diff --git a/Makefile.am b/Makefile.am index 77450602e..10a3aa739 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,545 +1,545 @@ ## This file is part of Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. confignicedir = $(sysconfdir)/build confignice_SCRIPTS=config.nice SUBDIRS = po config modules EXTRA_DIST = UNINSTALL THANKS RELEASE-NOTES configure-tests.py config.nice.in \ config.rpath # current MathJax version and packages - -MJV = 1.0.1a -MATHJAX = MathJax-v$(MJV).zip +# See also modules/miscutil/lib/htmlutils.py (get_mathjax_header) +MJV = 1.1 +MATHJAX = https://github.com/mathjax/MathJax/zipball/v$(MJV) # current FCKeditor version FCKV = 2.6.6 FCKEDITOR = FCKeditor_$(FCKV).zip # git-version-get stuff: BUILT_SOURCES = $(top_srcdir)/.version $(top_srcdir)/.version: echo $(VERSION) > $@-t && mv $@-t $@ dist-hook: echo $(VERSION) > $(distdir)/.tarball-version check-custom-templates: $(PYTHON) $(top_srcdir)/modules/webstyle/lib/template.py --check-custom-templates $(top_srcdir) kwalitee-check: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --stats $(top_srcdir) kwalitee-check-errors-only: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-errors $(top_srcdir) kwalitee-check-variables: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-variables $(top_srcdir) kwalitee-check-indentation: @$(PYTHON) $(top_srcdir)/modules/miscutil/lib/kwalitee.py --check-indentation $(top_srcdir) kwalitee-check-sql-queries: @echo "* Listing potentially dangerous SQL queries:" @echo "** SQL SELECT queries without explicit column list:" @find $(top_srcdir) -name '*.py' -exec grep -HEin 'SELECT \* FROM' {} \; 2> /dev/null @echo "** SQL INSERT queries without explicit column list:" @find $(top_srcdir) -name '*.py' -exec grep -HEin 'INSERT INTO ([[:alnum:]]|_)+[[:space:]]*VALUES' {} \; 2> /dev/null @find $(top_srcdir) -name '*.py' -exec grep -HEin 'INSERT INTO ([[:alnum:]]|_)+[[:space:]]*$$' {} \; 2> /dev/null @echo "** SQL queries using charset-ignorant escape_string():" @find $(top_srcdir) -name '*.py' -exec grep -HEin 'escape_string' {} \; 2> /dev/null @echo "** SQL queries using literal '%s':" @find $(top_srcdir) -name '*.py' -exec grep -HEin "run_sql.*'%[dfis]'" {} \; 2> /dev/null @find $(top_srcdir) -name '*.py' -exec grep -HEin 'run_sql.*"%[dfis]"' {} \; 2> /dev/null @echo "** SQL queries with potentially unescaped arguments:" @find $(top_srcdir) -name '*.py' -exec grep -HEin 'run_sql.* % ' {} \; 2> /dev/null @echo "* Done." etags: \rm -f $(top_srcdir)/TAGS (cd $(top_srcdir) && find $(top_srcdir) -name "*.py" -print | xargs etags) install-data-local: for d in / /cache /log /tmp /tmp-shared /data /run ; do \ mkdir -p $(localstatedir)$$d ; \ done @echo "************************************************************" @echo "** Invenio software has been successfully installed! **" @echo "** **" @echo "** You may proceed to customizing your installation now. **" @echo "************************************************************" install-mathjax-plugin: @echo "***********************************************************" @echo "** Installing MathJax plugin, please wait... **" @echo "***********************************************************" rm -rf /tmp/invenio-mathjax-plugin mkdir /tmp/invenio-mathjax-plugin + mkdir -p ${prefix}/var/www/MathJax (cd /tmp/invenio-mathjax-plugin && \ - wget 'http://www.mathjax.org/dl/$(MATHJAX)' && \ - unzip -q -u -d ${prefix}/var/www $(MATHJAX)) + wget '$(MATHJAX)' -O mathjax.zip --no-check-certificate && \ + unzip -q mathjax.zip && cd mathjax-MathJax-* && cp -ur * \ + ${prefix}/var/www/MathJax) rm -fr /tmp/invenio-mathjax-plugin - @echo "* Installing Invenio-specific MathJax config..." - (cd $(top_srcdir)/modules/webstyle/etc && make install) @echo "************************************************************" @echo "** The MathJax plugin was successfully installed. **" @echo "** Please do not forget to properly set the option **" @echo "** CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS in invenio.conf. **" @echo "************************************************************" uninstall-mathjax-plugin: @rm -rvf ${prefix}/var/www/MathJax @echo "***********************************************************" @echo "** The MathJax plugin was successfully uninstalled. **" @echo "***********************************************************" install-jscalendar-plugin: @echo "***********************************************************" @echo "** Installing jsCalendar plugin, please wait... **" @echo "***********************************************************" rm -rf /tmp/invenio-jscalendar-plugin mkdir /tmp/invenio-jscalendar-plugin (cd /tmp/invenio-jscalendar-plugin && \ wget 'http://www.dynarch.com/static/jscalendar-1.0.zip' && \ unzip -u jscalendar-1.0.zip && \ mkdir -p ${prefix}/var/www/jsCalendar && \ cp jscalendar-1.0/img.gif ${prefix}/var/www/jsCalendar/jsCalendar.gif && \ cp jscalendar-1.0/calendar.js ${prefix}/var/www/jsCalendar/ && \ cp jscalendar-1.0/calendar-setup.js ${prefix}/var/www/jsCalendar/ && \ cp jscalendar-1.0/lang/calendar-en.js ${prefix}/var/www/jsCalendar/ && \ cp jscalendar-1.0/calendar-blue.css ${prefix}/var/www/jsCalendar/) rm -fr /tmp/invenio-jscalendar-plugin @echo "***********************************************************" @echo "** The jsCalendar plugin was successfully installed. **" @echo "***********************************************************" uninstall-jscalendar-plugin: @rm -rvf ${prefix}/var/www/jsCalendar @echo "***********************************************************" @echo "** The jsCalendar plugin was successfully uninstalled. **" @echo "***********************************************************" install-jquery-plugins: install-jquery-plugins @echo "***********************************************************" @echo "** Installing various jQuery plugins, please wait... **" @echo "***********************************************************" mkdir -p ${prefix}/var/www/js (cd ${prefix}/var/www/js && \ wget http://jqueryjs.googlecode.com/files/jquery-1.3.1.min.js && \ mv jquery-1.3.1.min.js jquery.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/jquery-1.4.4.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.effects.core.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.effects.highlight.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.widget.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.tabs.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/ui/minified/ui.slider.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/ui/minified/ui.sortable.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.button.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.dialog.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.mouse.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.draggable.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.position.min.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.8.10/ui/minified/jquery.ui.resizable.min.js && \ wget http://www.appelsiini.net/download/jquery.jeditable.mini.js && \ wget http://github.com/malsup/form/raw/master/jquery.form.js --no-check-certificate && \ wget http://jquery-multifile-plugin.googlecode.com/svn/trunk/jquery.MultiFile.pack.js && \ wget http://plugins.jquery.com/files/jquery.autogrow-1.2.2.zip && \ wget http://tablesorter.com/jquery.tablesorter.zip && \ wget http://www.uploadify.com/wp-content/uploads/Uploadify-v2.1.3.zip -O uploadify.zip && \ wget http://www.datatables.net/download/build/jquery.dataTables.min.js && \ unzip jquery.tablesorter.zip && \ rm jquery.tablesorter.zip && \ unzip -u uploadify.zip && \ mv -T jquery.uploadify-v2.1.3 uploadify && \ wget http://flot.googlecode.com/files/flot-0.6.zip && \ wget http://trentrichardson.com/examples/timepicker/js/jquery-ui-timepicker-addon.js && \ unzip -u flot-0.6.zip && \ mv flot/jquery.flot.selection.min.js flot/jquery.flot.min.js flot/excanvas.min.js ./ && \ rm flot-0.6.zip && rm -r flot && \ mv uploadify/swfobject.js ./ && \ mv uploadify/cancel.png uploadify/uploadify.css uploadify/uploadify.allglyphs.swf uploadify/uploadify.fla uploadify/uploadify.swf ../img/ && \ mv uploadify/jquery.uploadify.v2.1.3.min.js ./jquery.uploadify.min.js && \ rm uploadify.zip && rm -r uploadify && \ unzip jquery.autogrow-1.2.2.zip jquery.autogrow.js && \ rm jquery.autogrow-1.2.2.zip && \ wget --no-check-certificate https://github.com/douglascrockford/JSON-js/raw/master/json2.js && \ wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/ui/minified/ui.datepicker.min.js && \ wget -O jquery.hotkeys.min.js http://js-hotkeys.googlecode.com/files/jquery.hotkeys-0.7.8-packed.js && \ wget http://plugins.jquery.com/files/jquery.treeview_3.zip && \ unzip jquery.treeview_3.zip && \ rm jquery.treeview_3.zip && \ wget http://plugins.jquery.com/files/jquery.ajaxPager.js_5.txt && \ mv jquery.ajaxPager.js_5.txt jquery.ajaxPager.js && \ wget http://jqueryui.com/download/jquery-ui-1.7.3.custom.zip && \ unzip jquery-ui-1.7.3.custom.zip development-bundle/ui/ui.core.js js/jquery-ui-1.7.3.custom.min.js && \ mv development-bundle/ui/ui.core.js ui.core.js && \ mv js/jquery-ui-1.7.3.custom.min.js jquery-ui-1.7.3.custom.min.js && \ rm -rf development-bundle && \ rm jquery-ui-1.7.3.custom.zip && \ mkdir -p ${prefix}/var/www/img && \ cd ${prefix}/var/www/img && \ wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/base/ && \ wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/smoothness/ && \ wget -r -np -nH --cut-dirs=4 -A "png,css" -P jquery-ui/themes http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/redmond/ && \ wget --no-check-certificate -O datatables_jquery-ui.css https://github.com/DataTables/DataTables/raw/master/media/css/demo_table_jui.css && \ wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/redmond/jquery-ui.css && \ wget http://jquery-ui.googlecode.com/svn/tags/1.7.3/demos/images/calendar.gif && \ wget -r -np -nH --cut-dirs=5 -A "png" http://jquery-ui.googlecode.com/svn/tags/1.7.3/themes/redmond/images/) @echo "***********************************************************" @echo "** The jQuery plugins were successfully installed. **" @echo "***********************************************************" uninstall-jquery-plugins: (cd ${prefix}/var/www/js && \ rm -f jquery.min.js && \ rm -f jquery.effects.core.min.js && \ rm -f jquery.effects.highlight.min.js && \ rm -f jquery.MultiFile.pack.js && \ rm -f jquery.jeditable.mini.js && \ rm -f jquery.flot.selection.min.js && \ rm -f jquery.flot.min.js && \ rm -f excanvas.min.js && \ rm -f jquery-ui-timepicker-addon.min.js && \ rm -f jquery.tablesorter.js && \ rm -f jquery.tablesorter.pager.js && \ rm -f ui.datepicker.min.js && \ rm -f jquery.autogrow.js && \ rm -f json2.js && \ rm -f jquery.uploadify.min.js && \ rm -f jquery.ui.slider.min.js && \ rm -f jquery.ui.sortable.min.js && \ rm -f jquery.ui.tabs.min.js && \ rm -rf tablesorter && \ rm -f jquery.hotkeys.min.js && \ rm -rf jquery-treeview && \ rm -f jquery.ajaxPager.js && \ rm -f jquery.form.js && \ rm -f jquery.dataTables.min.js && \ rm -f ui.core.js) && \ (cd ${prefix}/var/www/img && \ rm -f cancel.png uploadify.css uploadify.swf uploadify.allglyphs.swf uploadify.fla && \ rm -f datatables_jquery-ui.css) @echo "***********************************************************" @echo "** The jquery plugins were successfully uninstalled. **" @echo "***********************************************************" install-fckeditor-plugin: @echo "***********************************************************" @echo "** Installing FCKeditor plugin, please wait... **" @echo "***********************************************************" rm -rf ${prefix}/lib/python/invenio/fckeditor/ rm -rf /tmp/invenio-fckeditor-plugin mkdir /tmp/invenio-fckeditor-plugin (cd /tmp/invenio-fckeditor-plugin && \ wget 'http://downloads.sourceforge.net/fckeditor/$(FCKEDITOR)' && \ unzip -u -d ${prefix}/var/www $(FCKEDITOR)) && \ mkdir -p ${prefix}/lib/python/invenio/fckeditor/editor/filemanager/connectors/py && \ mv -f ${prefix}/var/www/fckeditor/fckeditor.py ${prefix}/lib/python/invenio/fckeditor/ && \ mv -f ${prefix}/var/www/fckeditor/editor/filemanager/connectors/py/*.py ${prefix}/lib/python/invenio/fckeditor/editor/filemanager/connectors/py/ && \ rm -f ${prefix}/var/www/fckeditor/editor/filemanager/connectors/py/upload.py && \ rm -f ${prefix}/var/www/fckeditor/editor/filemanager/connectors/py/zope.py && \ find ${prefix}/lib/python/invenio/fckeditor -type d -exec touch {}/__init__.py \; && \ find ${prefix}/var/www/fckeditor/ -depth -name '_*' -exec rm -rf {} \; && \ rm -r ${prefix}/var/www/fckeditor/editor/filemanager/connectors && \ find ${prefix}/var/www/fckeditor/fckeditor* -maxdepth 0 ! -name "fckeditor.js" -exec rm -r {} \; && \ rm -fr /tmp/invenio-fckeditor-plugin sed -i "s/if (FCKBrowserInfo\.IsSafari){FCKTools\.AddEventListener(FCK\.EditorDocument,'paste'/if (FCKBrowserInfo.IsSafari \&\& false){FCKTools.AddEventListener(FCK.EditorDocument,'paste'/" ${prefix}/var/www/fckeditor/editor/js/fckeditorcode_gecko.js # Fix issue described at , affecting v2.6.5, 2.6.6 @echo "* Installing Invenio-specific FCKeditor config..." (cd $(top_srcdir)/modules/webstyle/etc && make install) @echo "***********************************************************" @echo "** The FCKeditor plugin was successfully installed. **" @echo "** Please do not forget to properly set the option **" @echo "** CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR in invenio.conf. **" @echo "***********************************************************" uninstall-fckeditor-plugin: @rm -rvf ${prefix}/var/www/fckeditor @rm -rvf ${prefix}/lib/python/invenio/fckeditor @echo "***********************************************************" @echo "** The FCKeditor plugin was successfully uninstalled. **" @echo "***********************************************************" install-pdfa-helper-files: @echo "***********************************************************" @echo "** Installing PDF/A helper files, please wait... **" @echo "***********************************************************" wget 'http://invenio-software.org/download/invenio-demo-site-files/ISOCoatedsb.icc' -O ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc @echo "***********************************************************" @echo "** The PDF/A helper files were successfully installed. **" @echo "***********************************************************" uninstall-pdfa-helper-files: rm -f ${prefix}/etc/websubmit/file_converter_templates/ISOCoatedsb.icc @echo "***********************************************************" @echo "** The PDF/A helper files were successfully uninstalled. **" @echo "***********************************************************" update-v0.3.0-tables update-v0.3.1-tables: echo "ALTER TABLE idxINDEXNAME CHANGE id_idxINDEX id_idxINDEX mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkMETHODNAME CHANGE id_rnkMETHOD id_rnkMETHOD mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "ALTER TABLE collectionname CHANGE id_collection id_collection mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "ALTER TABLE formatname CHANGE id_format id_format mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "ALTER TABLE fieldname CHANGE id_field id_field mediumint(9) unsigned NOT NULL FIRST;" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runbibrank','run BibRank','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'cfgbibrank','configure BibRank','','no');" | ${prefix}/bin/dbexec update-v0.3.2-tables: echo "ALTER TABLE sbmCOLLECTION_sbmDOCTYPE CHANGE id_son id_son char(10) NOT NULL default '0';" | ${prefix}/bin/dbexec update-v0.3.3-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE flxLINKTYPEPARAMS CHANGE pname pname varchar(78) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkMETHOD DROP star_category_ranges;" | ${prefix}/bin/dbexec echo "DROP TABLE rnkSET;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE arguments arguments LONGTEXT;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE status status varchar(50);" | ${prefix}/bin/dbexec update-v0.5.0-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE session ADD INDEX uid (uid);" | ${prefix}/bin/dbexec echo "UPDATE idxINDEXNAME SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE rnkMETHODNAME SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE collectionname SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE collection_portalbox SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE formatname SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE fieldname SET ln='cs' WHERE ln='cz';" | ${prefix}/bin/dbexec echo "UPDATE idxINDEXNAME SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE rnkMETHODNAME SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE collectionname SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE collection_portalbox SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE formatname SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec echo "UPDATE fieldname SET ln='sv' WHERE ln='se';" | ${prefix}/bin/dbexec update-v0.7.1-tables: echo "DROP TABLE oaiHARVEST;" | ${prefix}/bin/dbexec ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'cfgbibharvest','configure BibHarvest','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runoaiharvest','run BibHarvest oaiharvest','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'cfgwebcomment','configure WebComment','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runoaiarchive','run BibHarvest oaiarchive','','no');" | ${prefix}/bin/dbexec echo "INSERT INTO accACTION (id,name,description,allowedkeywords,optional) VALUES (NULL,'runbibedit','run BibEdit','','no');" | ${prefix}/bin/dbexec echo "ALTER TABLE user ADD nickname varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE user ADD last_login datetime NOT NULL default '0000-00-00 00:00:00';" | ${prefix}/bin/dbexec echo "ALTER TABLE user ADD INDEX nickname (nickname);" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmFIELD CHANGE subname subname varchar(13) default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE user_query_basket CHANGE alert_name alert_name varchar(30) NOT NULL default '';" | ${prefix}/bin/dbexec echo "TRUNCATE TABLE session;" | ${prefix}/bin/dbexec @echo "**********************************************************" @echo "** Do not forget to run the basket migration now: **" @echo "** @PYTHON@ modules/webbasket/lib/webbasket_migration_kit.py " @echo "** Please see the RELEASE-NOTES for details. **" @echo "**********************************************************" @echo "INSERT INTO oaiARCHIVE (id, setName, setSpec, setDescription, setDefinition, setRecList) SELECT id, setName, setSpec, CONCAT_WS('', setDescription), setDefinition, setRecList FROM oaiSET;" update-v0.90.0-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE format ADD COLUMN (description varchar(255) default '');" | ${prefix}/bin/dbexec echo "ALTER TABLE format ADD COLUMN (content_type varchar(255) default '');" | ${prefix}/bin/dbexec update-v0.90.1-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE schTASK ADD INDEX status (status);" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD INDEX runtime (runtime);" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmCATEGORIES ADD COLUMN score TINYINT UNSIGNED NOT NULL DEFAULT 0;" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmCATEGORIES ADD PRIMARY KEY (doctype, sname);" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmCATEGORIES ADD KEY doctype (doctype);" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiHARVEST ADD COLUMN setspecs TEXT NOT NULL DEFAULT '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE setDescription setDescription text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE p1 p1 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE f1 f1 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE m1 m1 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE p2 p2 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE f2 f2 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE m2 m2 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE p3 p3 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE f3 f3 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiARCHIVE CHANGE m3 m3 text NOT NULL default '';" | ${prefix}/bin/dbexec echo "UPDATE bibdoc SET status=0 WHERE status='';" | ${prefix}/bin/dbexec echo "UPDATE bibdoc SET status=1 WHERE status='deleted';" | ${prefix}/bin/dbexec echo "ALTER TABLE fmtKNOWLEDGEBASES add COLUMN kbtype char default NULL;" | ${prefix}/bin/dbexec update-v0.92.0-tables: echo "UPDATE bibdoc SET status=0 WHERE status='';" | ${prefix}/bin/dbexec echo "UPDATE bibdoc SET status=1 WHERE status='deleted';" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE arguments arguments mediumblob;" | ${prefix}/bin/dbexec echo "UPDATE user SET note=1 WHERE nickname='admin' AND note IS NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE usergroup CHANGE name name varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE usergroup ADD login_method varchar(255) NOT NULL default 'INTERNAL';" | ${prefix}/bin/dbexec echo "ALTER TABLE usergroup ADD UNIQUE KEY login_method_name (login_method(70), name);" | ${prefix}/bin/dbexec echo "ALTER TABLE user CHANGE settings settings blob default NULL;" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Get_Recid', 'This function gets the recid for a document with a given report-number (as stored in the global variable rn).');" | ${prefix}/bin/dbexec update-v0.92.1-tables: echo "DROP TABLE rnkCITATIONDATA;" | ${prefix}/bin/dbexec ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "UPDATE bibdoc SET status='DELETED' WHERE status='1';" | ${prefix}/bin/dbexec echo "UPDATE bibdoc SET status='' WHERE status='0';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibrec ADD KEY creation_date (creation_date);" | ${prefix}/bin/dbexec echo "ALTER TABLE bibrec ADD KEY modification_date (modification_date);" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD KEY creation_date (creation_date);" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD KEY modification_date (modification_date);" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD KEY docname (docname);" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiHARVEST CHANGE postprocess postprocess varchar(20) NOT NULL default 'h';" | ${prefix}/bin/dbexec echo "ALTER TABLE oaiHARVEST ADD COLUMN bibfilterprogram varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE idxINDEXNAME CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE idxINDEX ADD COLUMN stemming_language VARCHAR(10) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkMETHODNAME CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkDOWNLOADS CHANGE id_bibdoc id_bibdoc mediumint(9) unsigned default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkDOWNLOADS CHANGE file_format file_format varchar(10) NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE collectionname CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE collection_portalbox CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE format ADD COLUMN visibility TINYINT NOT NULL default 1;" | ${prefix}/bin/dbexec echo "ALTER TABLE formatname CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE fieldname CHANGE ln ln char(5) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE accROLE ADD COLUMN firerole_def_ser blob NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE accROLE ADD COLUMN firerole_def_src text NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE user_accROLE ADD COLUMN expiration datetime NOT NULL default '9999-12-31 23:59:59';" | ${prefix}/bin/dbexec echo "ALTER TABLE user DROP INDEX id, ADD PRIMARY KEY id (id);" | ${prefix}/bin/dbexec echo -e 'from invenio.dbquery import run_sql;\ map(lambda index_id: run_sql("ALTER TABLE idxPHRASE%02dF CHANGE term term TEXT NULL DEFAULT NULL, DROP INDEX term, ADD INDEX term (term (50))" % index_id[0]), run_sql("select id from idxINDEX"))' | $(PYTHON) echo "INSERT INTO rnkCITATIONDATA VALUES (1,'citationdict','','');" | ${prefix}/bin/dbexec echo "INSERT INTO rnkCITATIONDATA VALUES (2,'reversedict','','');" | ${prefix}/bin/dbexec echo "INSERT INTO rnkCITATIONDATA VALUES (3,'selfcitdict','','');" | ${prefix}/bin/dbexec update-v0.99.0-tables: ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "ALTER TABLE bibdoc ADD COLUMN more_info mediumblob NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD COLUMN priority tinyint(4) NOT NULL default 0;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK ADD KEY priority (priority);" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA DROP PRIMARY KEY;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA ADD PRIMARY KEY (id);" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA CHANGE id id mediumint(8) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec echo "ALTER TABLE rnkCITATIONDATA ADD UNIQUE KEY object_name (object_name);" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmPARAMETERS CHANGE value value text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmAPPROVAL ADD note text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE hstDOCUMENT CHANGE docsize docsize bigint(15) unsigned NOT NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtACTIONHISTORY CHANGE client_host client_host int(10) unsigned default NULL;" | ${prefix}/bin/dbexec update-v0.99.1-tables: @echo "Nothing to do; table structure did not change between v0.99.1 and v0.99.2." update-v0.99.2-tables: @echo "Nothing to do; table structure did not change between v0.99.2 and v0.99.3." update-v0.99.3-tables: # from v0.99.3 to v1.0.0-rc0 echo "RENAME TABLE oaiARCHIVE TO oaiREPOSITORY;" | ${prefix}/bin/dbexec ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "INSERT INTO knwKB (id,name,description,kbtype) SELECT id,name,description,'' FROM fmtKNOWLEDGEBASES;" | ${prefix}/bin/dbexec echo "INSERT INTO knwKBRVAL (id,m_key,m_value,id_knwKB) SELECT id,m_key,m_value,id_fmtKNOWLEDGEBASES FROM fmtKNOWLEDGEBASEMAPPINGS;" | ${prefix}/bin/dbexec echo "ALTER TABLE sbmPARAMETERS CHANGE name name varchar(40) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc CHANGE docname docname varchar(250) COLLATE utf8_bin NOT NULL default 'file';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc CHANGE status status text NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE bibdoc ADD COLUMN text_extraction_date datetime NOT NULL default '0000-00-00';" | ${prefix}/bin/dbexec echo "ALTER TABLE collection DROP COLUMN restricted;" | ${prefix}/bin/dbexec echo "ALTER TABLE schTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE hstTASK CHANGE host host varchar(255) NOT NULL default '';" | ${prefix}/bin/dbexec echo "ALTER TABLE bib85x DROP INDEX kv, ADD INDEX kv (value(100));" | ${prefix}/bin/dbexec echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/HEP.rdf' WHERE name='HEP' AND location='';" | ${prefix}/bin/dbexec echo "UPDATE clsMETHOD SET location='http://invenio-software.org/download/invenio-demo-site-files/NASA-subjects.rdf' WHERE name='NASA-subjects' AND location='';" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET name='runoairepository', description='run oairepositoryupdater task' WHERE name='runoaiarchive';" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET name='cfgoaiharvest', description='configure OAI Harvest' WHERE name='cfgbibharvest';" | ${prefix}/bin/dbexec echo "ALTER TABLE accARGUMENT CHANGE value value varchar(255);" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET allowedkeywords='doctype,act,categ' WHERE name='submit';" | ${prefix}/bin/dbexec echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('categ','*');" | ${prefix}/bin/dbexec echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='submit' and accARGUMENT.keyword='categ' and accARGUMENT.value='*';" | ${prefix}/bin/dbexec echo "UPDATE accACTION SET allowedkeywords='name,with_editor_rights' WHERE name='cfgwebjournal';" | ${prefix}/bin/dbexec echo "INSERT INTO accARGUMENT(keyword,value) VALUES ('with_editor_rights','yes');" | ${prefix}/bin/dbexec echo "INSERT INTO accROLE_accACTION_accARGUMENT(id_accROLE,id_accACTION,id_accARGUMENT,argumentlistid) SELECT DISTINCT raa.id_accROLE,raa.id_accACTION,accARGUMENT.id,raa.argumentlistid FROM accROLE_accACTION_accARGUMENT as raa JOIN accACTION on id_accACTION=accACTION.id,accARGUMENT WHERE accACTION.name='cfgwebjournal' and accARGUMENT.keyword='with_editor_rights' and accARGUMENT.value='yes';" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC CHANGE id id int(15) unsigned NOT NULL auto_increment;" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC ADD external_id int(15) NOT NULL default '0';" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC ADD collection_id int(15) unsigned NOT NULL default '0';" | ${prefix}/bin/dbexec echo "ALTER TABLE bskEXTREC ADD original_url text;" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD status char(2) NOT NULL default 'ok';" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY status (status);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Photos_to_Storage','Attach/edit the pictures uploaded with the \"create_photos_manager_interface()\" function');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Photos',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a photos upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Photos_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\n\r\nfrom invenio.websubmit_functions.ParamFile import ParamFromFile\r\nfrom invenio.websubmit_functions.Move_Photos_to_Storage import read_param_file, create_photos_manager_interface, get_session_id\r\n\r\n# Retrieve session id\r\ntry:\r\n # User info is defined only in MBI/MPI actions...\r\n session_id = get_session_id(None, uid, user_info) \r\nexcept:\r\n session_id = get_session_id(req, uid, {})\r\n\r\n# Retrieve context\r\nindir = curdir.split(\'/\')[-3]\r\ndoctype = curdir.split(\'/\')[-2]\r\naccess = curdir.split(\'/\')[-1]\r\n\r\n# Get the record ID, if any\r\nsysno = ParamFromFile(\"%s/%s\" % (curdir,\'SN\')).strip()\r\n\r\n\"\"\"\r\nModify below the configuration of the photos manager interface.\r\nNote: \'can_reorder_photos\' parameter is not yet fully taken into consideration\r\n\r\nDocumentation of the function is available by running:\r\necho -e \'from invenio.websubmit_functions.Move_Photos_to_Storage import create_photos_manager_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext += create_photos_manager_interface(sysno, session_id, uid,\r\n doctype, indir, curdir, access,\r\n can_delete_photos=True,\r\n can_reorder_photos=True,\r\n can_upload_photos=True,\r\n editor_width=700,\r\n editor_height=400,\r\n initial_slider_value=100,\r\n max_slider_value=200,\r\n min_slider_value=80)','0000-00-00','0000-00-00',NULL,NULL,0);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Photos_to_Storage','iconsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFIELDDESC VALUES ('Upload_Files',NULL,'','R',NULL,NULL,NULL,NULL,NULL,'\"\"\"\r\nThis is an example of element that creates a file upload interface.\r\nClone it, customize it and integrate it into your submission. Then add function \r\n\'Move_Uploaded_Files_to_Storage\' to your submission functions list, in order for files \r\nuploaded with this interface to be attached to the record. More information in \r\nthe WebSubmit admin guide.\r\n\"\"\"\r\nfrom invenio.websubmit_managedocfiles import create_file_upload_interface\r\nfrom invenio.websubmit_functions.Shared_Functions import ParamFromFile\r\n\r\nindir = ParamFromFile(os.path.join(curdir, \'indir\'))\r\ndoctype = ParamFromFile(os.path.join(curdir, \'doctype\'))\r\naccess = ParamFromFile(os.path.join(curdir, \'access\'))\r\ntry:\r\n sysno = int(ParamFromFile(os.path.join(curdir, \'SN\')).strip())\r\nexcept:\r\n sysno = -1\r\nln = ParamFromFile(os.path.join(curdir, \'ln\'))\r\n\r\n\"\"\"\r\nRun the following to get the list of parameters of function \'create_file_upload_interface\':\r\necho -e \'from invenio.websubmit_managedocfiles import create_file_upload_interface as f\\nprint f.__doc__\' | python\r\n\"\"\"\r\ntext = create_file_upload_interface(recid=sysno,\r\n print_outside_form_tag=False,\r\n include_headers=True,\r\n ln=ln,\r\n doctypes_and_desc=[(\'main\',\'Main document\'),\r\n (\'additional\',\'Figure, schema, etc.\')],\r\n can_revise_doctypes=[\'*\'],\r\n can_describe_doctypes=[\'main\'],\r\n can_delete_doctypes=[\'additional\'],\r\n can_rename_doctypes=[\'main\'],\r\n sbm_indir=indir, sbm_doctype=doctype, sbm_access=access)[1]\r\n','0000-00-00','0000-00-00',NULL,NULL,0);" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','forceFileRevision');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Create_Upload_Files_Interface','Display generic interface to add/revise/delete files. To be used before function \"Move_Uploaded_Files_to_Storage\"');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Uploaded_Files_to_Storage','Attach files uploaded with \"Create_Upload_Files_Interface\"')" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','elementNameToDoctype');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createIconDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','createRelatedFormats');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','iconsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Revised_Files_to_Storage','keepPreviousVersionDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_Revised_Files_to_Storage','Revise files initially uploaded with \"Move_Files_to_Storage\"')" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','minsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','doctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictions');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDeleteDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canReviseDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canDescribeDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canCommentDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canKeepDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canAddFormatDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRestrictDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canRenameDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','canNameNewFiles');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','createRelatedFormats');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','keepDefault');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','showLinks');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','fileLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','filenameLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','descriptionLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','commentLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','restrictionLabel');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','startDoc');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','endDoc');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','defaultFilenameDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Create_Upload_Files_Interface','maxFilesDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','iconsize');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Uploaded_Files_to_Storage','createIconDoctypes');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Report_Number_Generation','nblength');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Second_Report_Number_Generation','2nd_nb_length');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Get_Recid','record_search_pattern');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Move_FCKeditor_Files_to_Storage','Transfer files attached to the record with the FCKeditor');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_FCKeditor_Files_to_Storage','input_fields');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','layer');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','layer');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Replace_Single_File_Approval','switch_file');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Stamp_Uploaded_Files','switch_file');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_restrictions');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Move_Files_to_Storage','paths_and_doctypes');" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD round_name varchar(255) NOT NULL default ''" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD restriction varchar(50) NOT NULL default ''" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD in_reply_to_id_cmtRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD KEY in_reply_to_id_cmtRECORDCOMMENT (in_reply_to_id_cmtRECORDCOMMENT);" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD in_reply_to_id_bskRECORDCOMMENT int(15) unsigned NOT NULL default '0'" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD KEY in_reply_to_id_bskRECORDCOMMENT (in_reply_to_id_bskRECORDCOMMENT);" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD reply_order_cached_data blob NULL default NULL;" | ${prefix}/bin/dbexec echo "ALTER TABLE cmtRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec echo "ALTER TABLE bskRECORDCOMMENT ADD INDEX (reply_order_cached_data(40));" | ${prefix}/bin/dbexec echo -e 'from invenio.webcommentadminlib import migrate_comments_populate_threads_index;\ migrate_comments_populate_threads_index()' | $(PYTHON) echo -e 'from invenio.access_control_firerole import repair_role_definitions;\ repair_role_definitions()' | $(PYTHON) update-v1.0.0-rc0-tables: # from v1.0.0-rc0 to next release ${prefix}/bin/dbexec < $(top_srcdir)/modules/miscutil/sql/tabcreate.sql echo "INSERT INTO sbmALLFUNCDESCR VALUES ('Set_Embargo','Set an embargo on all the documents of a given record.');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Set_Embargo','date_file');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('Set_Embargo','date_format');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('User_is_Record_Owner_or_Curator','curator_role');" | ${prefix}/bin/dbexec echo "INSERT INTO sbmFUNDESC VALUES ('User_is_Record_Owner_or_Curator','curator_flag');" | ${prefix}/bin/dbexec echo "INSERT INTO format (name, code, description, content_type, visibility) VALUES ('Podcast', 'xp', 'Sample format suitable for multimedia feeds, such as podcasts', 'application/rss+xml', 0);" | ${prefix}/bin/dbexec CLEANFILES = *~ *.pyc *.tmp diff --git a/config/invenio.conf b/config/invenio.conf index 7faee32d8..32881a90f 100644 --- a/config/invenio.conf +++ b/config/invenio.conf @@ -1,1321 +1,1336 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ################################################### ## About 'invenio.conf' and 'invenio-local.conf' ## ################################################### ## The 'invenio.conf' file contains the vanilla default configuration ## parameters of a Invenio installation, as coming out of the ## distribution. The file should be self-explanatory. Once installed ## in its usual location (usually /opt/invenio/etc), you could in ## principle go ahead and change the values according to your local ## needs, but this is not advised. ## ## If you would like to customize some of these parameters, you should ## rather create a file named 'invenio-local.conf' in the same ## directory where 'invenio.conf' lives and you should write there ## only the customizations that you want to be different from the ## vanilla defaults. ## ## Here is a realistic, minimalist, yet production-ready example of ## what you would typically put there: ## ## $ cat /opt/invenio/etc/invenio-local.conf ## [Invenio] ## CFG_SITE_NAME = John Doe's Document Server ## CFG_SITE_NAME_INTL_fr = Serveur des Documents de John Doe ## CFG_SITE_URL = http://your.site.com ## CFG_SITE_SECURE_URL = https://your.site.com ## CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com ## CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com ## CFG_WEBALERT_ALERT_ENGINE_EMAIL = john.doe@your.site.com ## CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = john.doe@your.site.com ## CFG_WEBCOMMENT_DEFAULT_MODERATOR = john.doe@your.site.com ## CFG_DATABASE_HOST = localhost ## CFG_DATABASE_NAME = invenio ## CFG_DATABASE_USER = invenio ## CFG_DATABASE_PASS = my123p$ss ## ## You should override at least the parameters mentioned above and the ## parameters mentioned in the `Part 1: Essential parameters' below in ## order to define some very essential runtime parameters such as the ## name of your document server (CFG_SITE_NAME and ## CFG_SITE_NAME_INTL_*), the visible URL of your document server ## (CFG_SITE_URL and CFG_SITE_SECURE_URL), the email address of the ## local Invenio administrator, comment moderator, and alert engine ## (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_ADMIN_EMAIL, etc), and last but ## not least your database credentials (CFG_DATABASE_*). ## ## The Invenio system will then read both the default invenio.conf ## file and your customized invenio-local.conf file and it will ## override any default options with the ones you have specified in ## your local file. This cascading of configuration parameters will ## ease your future upgrades. [Invenio] ################################### ## Part 1: Essential parameters ## ################################### ## This part defines essential Invenio internal parameters that ## everybody should override, like the name of the server or the email ## address of the local Invenio administrator. ## CFG_DATABASE_* - specify which MySQL server to use, the name of the ## database to use, and the database access credentials. CFG_DATABASE_HOST = localhost CFG_DATABASE_PORT = 3306 CFG_DATABASE_NAME = invenio CFG_DATABASE_USER = invenio CFG_DATABASE_PASS = my123p$ss ## CFG_SITE_URL - specify URL under which your installation will be ## visible. For example, use "http://your.site.com". Do not leave ## trailing slash. CFG_SITE_URL = http://localhost ## CFG_SITE_SECURE_URL - specify secure URL under which your ## installation secure pages such as login or registration will be ## visible. For example, use "https://your.site.com". Do not leave ## trailing slash. If you don't plan on using HTTPS, then you may ## leave this empty. CFG_SITE_SECURE_URL = https://localhost ## CFG_SITE_NAME -- the visible name of your Invenio installation. CFG_SITE_NAME = Atlantis Institute of Fictive Science ## CFG_SITE_NAME_INTL -- the international versions of CFG_SITE_NAME ## in various languages. (See also CFG_SITE_LANGS below.) CFG_SITE_NAME_INTL_en = Atlantis Institute of Fictive Science CFG_SITE_NAME_INTL_fr = Atlantis Institut des Sciences Fictives CFG_SITE_NAME_INTL_de = Atlantis Institut der fiktiven Wissenschaft CFG_SITE_NAME_INTL_es = Atlantis Instituto de la Ciencia Fictive CFG_SITE_NAME_INTL_ca = Institut Atlantis de Ciència Fictícia CFG_SITE_NAME_INTL_pt = Instituto Atlantis de Ciência Fictícia CFG_SITE_NAME_INTL_it = Atlantis Istituto di Scienza Fittizia CFG_SITE_NAME_INTL_ru = Институт Фиктивных Наук Атлантиды CFG_SITE_NAME_INTL_sk = Atlantis Inštitút Fiktívnych Vied CFG_SITE_NAME_INTL_cs = Atlantis Institut Fiktivních Věd CFG_SITE_NAME_INTL_no = Atlantis Institutt for Fiktiv Vitenskap CFG_SITE_NAME_INTL_sv = Atlantis Institut för Fiktiv Vetenskap CFG_SITE_NAME_INTL_el = Ινστιτούτο Φανταστικών Επιστημών Ατλαντίδος CFG_SITE_NAME_INTL_uk = Інститут вигаданих наук в Атлантісі CFG_SITE_NAME_INTL_ja = Fictive 科学のAtlantis の協会 CFG_SITE_NAME_INTL_pl = Instytut Fikcyjnej Nauki Atlantis CFG_SITE_NAME_INTL_bg = Институт за фиктивни науки Атлантис CFG_SITE_NAME_INTL_hr = Institut Fiktivnih Znanosti Atlantis CFG_SITE_NAME_INTL_zh_CN = 阿特兰提斯虚拟科学学院 CFG_SITE_NAME_INTL_zh_TW = 阿特蘭提斯虛擬科學學院 CFG_SITE_NAME_INTL_hu = Kitalált Tudományok Atlantiszi Intézete CFG_SITE_NAME_INTL_af = Atlantis Instituut van Fiktiewe Wetenskap CFG_SITE_NAME_INTL_gl = Instituto Atlantis de Ciencia Fictive CFG_SITE_NAME_INTL_ro = Institutul Atlantis al Ştiinţelor Fictive CFG_SITE_NAME_INTL_rw = Atlantis Ishuri Rikuru Ry'ubuhanga CFG_SITE_NAME_INTL_ka = ატლანტიდის ფიქტიური მეცნიერების ინსტიტუტი CFG_SITE_NAME_INTL_lt = Fiktyvių Mokslų Institutas Atlantis ## CFG_SITE_LANG -- the default language of the interface: ' CFG_SITE_LANG = en ## CFG_SITE_LANGS -- list of all languages the user interface should ## be available in, separated by commas. The order specified below ## will be respected on the interface pages. A good default would be ## to use the alphabetical order. Currently supported languages ## include Afrikaans, Bulgarian, Catalan, Czech, German, Georgian, ## Greek, English, Spanish, French, Croatian, Hungarian, Galician, ## Italian, Japanese, Kinyarwanda, Lithuanian, Norwegian, Polish, ## Portuguese, Romanian, Russian, Slovak, Swedish, Ukrainian, Chinese ## (China), Chinese (Taiwan), so that the eventual maximum you can ## currently select is ## "af,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW". CFG_SITE_LANGS = af,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW ## CFG_SITE_SUPPORT_EMAIL -- the email address of the support team for ## this installation: CFG_SITE_SUPPORT_EMAIL = info@invenio-software.org ## CFG_SITE_ADMIN_EMAIL -- the email address of the 'superuser' for ## this installation. Enter your email address below and login with ## this address when using Invenio inistration modules. You ## will then be automatically recognized as superuser of the system. CFG_SITE_ADMIN_EMAIL = info@invenio-software.org ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES -- list of email addresses to ## which an email should be sent in case of emergency (e.g. bibsched ## queue has been stopped because of an error). Configuration ## dictionary allows for different recipients based on weekday and ## time-of-day. Example: ## ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = { ## 'Sunday 22:00-06:00': '0041761111111@email2sms.foo.com', ## '06:00-18:00': 'team-in-europe@foo.com,0041762222222@email2sms.foo.com', ## '18:00-06:00': 'team-in-usa@foo.com', ## '*': 'john.doe.phone@foo.com'} ## ## If you want the emergency email notifications to always go to the ## same address, just use the wildcard line in the above example. CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = {} ## CFG_SITE_ADMIN_EMAIL_EXCEPTIONS -- set this to 0 if you do not want ## to receive any captured exception via email to CFG_SITE_ADMIN_EMAIL ## address. Captured exceptions will still be available in ## var/log/invenio.err file. Set this to 1 if you want to receive ## some of the captured exceptions (this depends on the actual place ## where the exception is captured). Set this to 2 if you want to ## receive all captured exceptions. CFG_SITE_ADMIN_EMAIL_EXCEPTIONS = 1 ## CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER -- set this to ## the number of seconds after which to reset the exception notification ## counter. A given repetitive exception is notified via email with a ## logarithmic strategy: the first time it is seen it is sent via email, ## then the second time, then the fourth, then the eighth and so forth. ## If the number of seconds elapsed since the last time it was notified ## is greater than CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER ## then the internal counter is reset in order not to have exception ## notification become more and more rare. CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER = 14400 ## CFG_CERN_SITE -- do we want to enable CERN-specific code? ## Put "1" for "yes" and "0" for "no". CFG_CERN_SITE = 0 ## CFG_INSPIRE_SITE -- do we want to enable INSPIRE-specific code? ## Put "1" for "yes" and "0" for "no". CFG_INSPIRE_SITE = 0 ## CFG_ADS_SITE -- do we want to enable ADS-specific code? ## Put "1" for "yes" and "0" for "no". CFG_ADS_SITE = 0 ## CFG_OPENAIRE_SITE -- do we want to enable OpenAIRE-specific code? ## Put "1" for "yes" and "0" for "no". CFG_OPENAIRE_SITE = 0 ## CFG_DEVEL_SITE -- is this a development site? If it is, you might ## prefer that it does not do certain things. For example, you might ## not want WebSubmit to send certain emails or trigger certain ## processes on a development site. ## Put "1" for "yes" (this is a development site) or "0" for "no" ## (this isn't a development site.) CFG_DEVEL_SITE = 0 ################################ ## Part 2: Web page style ## ################################ ## The variables affecting the page style. The most important one is ## the 'template skin' you would like to use and the obfuscation mode ## for your email addresses. Please refer to the WebStyle Admin Guide ## for more explanation. The other variables are listed here mostly ## for backwards compatibility purposes only. ## CFG_WEBSTYLE_TEMPLATE_SKIN -- what template skin do you want to ## use? CFG_WEBSTYLE_TEMPLATE_SKIN = default ## CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE. How do we "protect" ## email addresses from undesired automated email harvesters? This ## setting will not affect 'support' and 'admin' emails. ## NOTE: there is no ultimate solution to protect against email ## harvesting. All have drawbacks and can more or less be ## circumvented. Choose you preferred mode ([t] means "transparent" ## for the user): ## -1: hide all emails. ## [t] 0 : no protection, email returned as is. ## foo@example.com => foo@example.com ## 1 : basic email munging: replaces @ by [at] and . by [dot] ## foo@example.com => foo [at] example [dot] com ## [t] 2 : transparent name mangling: characters are replaced by ## equivalent HTML entities. ## foo@example.com => foo@example.com ## [t] 3 : javascript insertion. Requires Javascript enabled on client ## side. ## 4 : replaces @ and . characters by gif equivalents. ## foo@example.com => foo [at] example [dot] com CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE = 2 ## CFG_WEBSTYLE_INSPECT_TEMPLATES -- Do we want to debug all template ## functions so that they would return HTML results wrapped in ## comments indicating which part of HTML page was created by which ## template function? Useful only for debugging Pythonic HTML ## template. See WebStyle Admin Guide for more information. CFG_WEBSTYLE_INSPECT_TEMPLATES = 0 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP -- eventual global HTML ## left top box: CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM -- eventual global ## HTML left bottom box: CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP -- eventual global ## HTML right top box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM -- eventual global ## HTML right bottom box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM = ## CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST -- when certain HTTP status ## codes are raised to the WSGI handler, the corresponding exceptions ## and error messages can be sent to the system administrator for ## inspecting. This is useful to detect and correct errors. The ## variable represents a comma-separated list of HTTP statuses that ## should alert admin. Wildcards are possible. If the status is ## followed by an "r", it means that a referer is required to exist ## (useful to distinguish broken known links from URL typos when 404 ## errors are raised). CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST = 404r,400,5*,41* ################################## ## Part 3: WebSearch parameters ## ################################## ## This section contains some configuration parameters for WebSearch ## module. Please note that WebSearch is mostly configured on ## run-time via its WebSearch Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (Note that you may modify them ## afterwards too, though.) ## CFG_WEBSEARCH_SEARCH_CACHE_SIZE -- how many queries we want to ## cache in memory per one Apache httpd process? This cache is used ## mainly for "next/previous page" functionality, but it caches also ## "popular" user queries if more than one user happen to search for ## the same thing. Note that large numbers may lead to great memory ## consumption. We recommend a value not greater than 100. CFG_WEBSEARCH_SEARCH_CACHE_SIZE = 0 ## CFG_WEBSEARCH_FIELDS_CONVERT -- if you migrate from an older ## system, you may want to map field codes of your old system (such as ## 'ti') to Invenio/MySQL ("title"). Use Python dictionary syntax ## for the translation table, e.g. {'wau':'author', 'wti':'title'}. ## Usually you don't want to do that, and you would use empty dict {}. CFG_WEBSEARCH_FIELDS_CONVERT = {} ## CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the light search interface, in ## characters. CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60 CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60 ## CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH -- width of the search ## pattern window in the simple search interface, in characters. CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 40 ## CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the advanced search interface, in ## characters. CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH = 30 ## CFG_WEBSEARCH_NB_RECORDS_TO_SORT -- how many records do we still ## want to sort? For higher numbers we print only a warning and won't ## perform any sorting other than default 'latest records first', as ## sorting would be very time consuming then. We recommend a value of ## not more than a couple of thousands. CFG_WEBSEARCH_NB_RECORDS_TO_SORT = 1000 ## CFG_WEBSEARCH_CALL_BIBFORMAT -- if a record is being displayed but ## it was not preformatted in the "HTML brief" format, do we want to ## call BibFormatting on the fly? Put "1" for "yes" and "0" for "no". ## Note that "1" will display the record exactly as if it were fully ## preformatted, but it may be slow due to on-the-fly processing; "0" ## will display a default format very fast, but it may not have all ## the fields as in the fully preformatted HTML brief format. Note ## also that this option is active only for old (PHP) formats; the new ## (Python) formats are called on the fly by default anyway, since ## they are much faster. When usure, please set "0" here. CFG_WEBSEARCH_CALL_BIBFORMAT = 0 ## CFG_WEBSEARCH_USE_ALEPH_SYSNOS -- do we want to make old SYSNOs ## visible rather than MySQL's record IDs? You may use this if you ## migrate from a different e-doc system, and you store your old ## system numbers into 970__a. Put "1" for "yes" and "0" for ## "no". Usually you don't want to do that, though. CFG_WEBSEARCH_USE_ALEPH_SYSNOS = 0 ## CFG_WEBSEARCH_I18N_LATEST_ADDITIONS -- Put "1" if you want the ## "Latest Additions" in the web collection pages to show ## internationalized records. Useful only if your brief BibFormat ## templates contains internationalized strings. Otherwise put "0" in ## order not to slow down the creation of latest additions by WebColl. CFG_WEBSEARCH_I18N_LATEST_ADDITIONS = 0 ## CFG_WEBSEARCH_INSTANT_BROWSE -- the number of records to display ## under 'Latest Additions' in the web collection pages. CFG_WEBSEARCH_INSTANT_BROWSE = 10 ## CFG_WEBSEARCH_INSTANT_BROWSE_RSS -- the number of records to ## display in the RSS feed. CFG_WEBSEARCH_INSTANT_BROWSE_RSS = 25 ## CFG_WEBSEARCH_RSS_I18N_COLLECTIONS -- comma-separated list of ## collections that feature an internationalized RSS feed on their ## main seach interface page created by webcoll. Other collections ## will have RSS feed using CFG_SITE_LANG. CFG_WEBSEARCH_RSS_I18N_COLLECTIONS = ## CFG_WEBSEARCH_RSS_TTL -- number of minutes that indicates how long ## a feed cache is valid. CFG_WEBSEARCH_RSS_TTL = 360 ## CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS -- maximum number of request kept ## in cache. If the cache is filled, following request are not cached. CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS = 1000 ## CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD -- up to how many author names ## to print explicitely; for more print "et al". Note that this is ## used in default formatting that is seldomly used, as usually ## BibFormat defines all the format. The value below is only used ## when BibFormat fails, for example. CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD = 3 ## CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS -- whether to show or ## not collection grandsons in Narrow Search boxes (sons are shown by ## default, grandsons are configurable here). Use 0 for no and 1 for ## yes. CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS = 1 ## CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX -- shall we ## create help links for Ellis, Nick or Ellis, Nicholas and friends ## when Ellis, N was searched for? Useful if you have one author ## stored in the database under several name formats, namely surname ## comma firstname and surname comma initial cataloging policy. Use 0 ## for no and 1 for yes. CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX = 1 ## CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS -- MathJax is a JavaScript ## library that renders (La)TeX mathematical formulas in the client ## browser. This parameter must contain a comma-separated list of ## output formats for which to apply the MathJax rendering, for example ## "hb,hd". If the list is empty, MathJax is disabled. CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS = ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT -- when searching ## external collections (e.g. SPIRES, CiteSeer, etc), how many seconds ## do we wait for reply before abandonning? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT = 5 ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS -- how many ## results do we fetch? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS = 10 ## CFG_WEBSEARCH_SPLIT_BY_COLLECTION -- do we want to split the search ## results by collection or not? Use 0 for not, 1 for yes. CFG_WEBSEARCH_SPLIT_BY_COLLECTION = 1 ## CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS -- the default number of ## records to display per page in the search results pages. CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS = 10 ## CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS -- in order to limit denial of ## service attacks the total number of records per group displayed as a ## result of a search query will be limited to this number. Only the superuser ## queries will not be affected by this limit. CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS = 200 ## CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL -- logged in users ## might have rights to access some restricted collections. This variable ## tweaks the kind of support the system will automatically provide to the ## user with respect to searching into these restricted collections. ## Set this to 0 in order to have the user to explicitly activate restricted ## collections in order to search into them. Set this to 1 in order to ## propose to the user the list of restricted collections to which he/she has ## rights (note: this is not yet implemented). Set this to 2 in order to ## silently add all the restricted collections to which the user has rights to ## to any query. ## Note: the system will discover which restricted collections a user has ## rights to, at login time. The time complexity of this procedure is ## proportional to the number of restricted collections. E.g. for a system ## with ~50 restricted collections, you might expect ~1s of delay in the ## login time, when this variable is set to a value higher than 0. CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL = 0 ## CFG_WEBSEARCH_SHOW_COMMENT_COUNT -- do we want to show the 'N comments' ## links on the search engine pages? (useful only when you have allowed ## commenting) CFG_WEBSEARCH_SHOW_COMMENT_COUNT = 1 ## CFG_WEBSEARCH_SHOW_REVIEW_COUNT -- do we want to show the 'N reviews' ## links on the search engine pages? (useful only when you have allowed ## reviewing) CFG_WEBSEARCH_SHOW_REVIEW_COUNT = 1 ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS -- how many full-text snippets to ## display for full-text searches? CFG_WEBSEARCH_FULLTEXT_SNIPPETS = 4 ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS -- how many context words ## to display around the pattern in the snippet? CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS = 4 ## CFG_WEBSEARCH_WILDCARD_LIMIT -- some of the queries, wildcard ## queries in particular (ex: cern*, a*), but also regular expressions ## (ex: [a-z]+), may take a long time to respond due to the high ## number of hits. You can limit the number of terms matched by a ## wildcard by setting this variable. A negative value or zero means ## that none of the queries will be limited (which may be wanted by ## also prone to denial-of-service kind of attacks). CFG_WEBSEARCH_WILDCARD_LIMIT = 50000 ## CFG_WEBSEARCH_SYNONYM_KBRS -- defines which knowledge bases are to ## be used for which index in order to provide runtime synonym lookup ## of user-supplied terms, and what massaging function should be used ## upon search pattern before performing the KB lookup. (Can be one ## of `exact', 'leading_to_comma', `leading_to_number'.) CFG_WEBSEARCH_SYNONYM_KBRS = { 'journal': ['SEARCH-SYNONYM-JOURNAL', 'leading_to_number'], } ## CFG_SOLR_URL -- optionally, you may use Solr to serve full-text ## queries. If so, please specify the URL of your Solr instance. ## (example: http://localhost:8080/sorl) CFG_SOLR_URL = ####################################### ## Part 4: BibHarvest OAI parameters ## ####################################### ## This part defines parameters for the Invenio OAI gateway. ## Useful if you are running Invenio as OAI data provider. ## CFG_OAI_ID_FIELD -- OAI identifier MARC field: CFG_OAI_ID_FIELD = 909COo ## CFG_OAI_SET_FIELD -- OAI set MARC field: CFG_OAI_SET_FIELD = 909COp ## CFG_OAI_DELETED_POLICY -- OAI deletedrecordspolicy ## (no/transient/persistent). CFG_OAI_DELETED_POLICY = no ## CFG_OAI_ID_PREFIX -- OAI identifier prefix: CFG_OAI_ID_PREFIX = atlantis.cern.ch ## CFG_OAI_SAMPLE_IDENTIFIER -- OAI sample identifier: CFG_OAI_SAMPLE_IDENTIFIER = oai:atlantis.cern.ch:CERN-TH-4036 ## CFG_OAI_IDENTIFY_DESCRIPTION -- description for the OAI Identify verb: CFG_OAI_IDENTIFY_DESCRIPTION = oai atlantis.cern.ch : oai:atlantis.cern.ch:CERN-TH-4036 http://atlantis.cern.ch/ Free and unlimited use by anybody with obligation to refer to original record Full content, i.e. preprints may not be harvested by robots Submission restricted. Submitted documents are subject of approval by OAI repository admins. ## CFG_OAI_LOAD -- OAI number of records in a response: CFG_OAI_LOAD = 1000 ## CFG_OAI_EXPIRE -- OAI resumptionToken expiration time: CFG_OAI_EXPIRE = 90000 ## CFG_OAI_SLEEP -- service unavailable between two consecutive ## requests for CFG_OAI_SLEEP seconds: CFG_OAI_SLEEP = 10 ################################## ## Part 5: WebSubmit parameters ## ################################## ## This section contains some configuration parameters for WebSubmit ## module. Please note that WebSubmit is mostly configured on ## run-time via its WebSubmit Admin web interface. The parameters ## below are the ones that you do not probably want to modify during ## the runtime. ## CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT -- the fulltext ## documents are stored under "/opt/invenio/var/data/files/gX/Y" ## directories where X is 0,1,... and Y stands for bibdoc ID. Thusly ## documents Y are grouped into directories X and this variable ## indicates the maximum number of documents Y stored in each ## directory X. This limit is imposed solely for filesystem ## performance reasons in order not to have too many subdirectories in ## a given directory. CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000 ## CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS -- a comma-separated ## list of document extensions not listed in Python standard mimetype ## library that should be recognized by Invenio. CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS = hpg,link,lis,llb,mat,mpp,msg,docx,docm,xlsx,xlsm,xlsb,pptx,pptm,ppsx,ppsm ## CFG_BIBDOCFILE_USE_XSENDFILE -- if your web server supports ## XSendfile header, you may want to enable this feature in order for ## to Invenio tell the web server to stream files for download (after ## proper authorization checks) by web server's means. This helps to ## liberate Invenio worker processes from being busy with sending big ## files to clients. The web server will take care of that. Note: ## this feature is still somewhat experimental. Note: when enabled ## (set to 1), then you have to also regenerate Apache vhost conf ## snippets (inveniocfg --update-config-py --create-apache-conf). CFG_BIBDOCFILE_USE_XSENDFILE = 0 ## CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY -- a number between 0 and ## 1 that indicates probability with which MD5 checksum will be ## verified when streaming bibdocfile-managed files. (0.1 will cause ## the check to be performed once for every 10 downloads) CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY = 0.1 ## CFG_OPENOFFICE_SERVER_HOST -- the host where an OpenOffice Server is ## listening to. If localhost an OpenOffice server will be started ## automatically if it is not already running. ## Note: if you set this to an empty value this will disable the usage of ## OpenOffice for converting documents. ## If you set this to something different than localhost you'll have to take ## care to have an OpenOffice server running on the corresponding host and ## to install the same OpenOffice release both on the client and on the server ## side. ## In order to launch an OpenOffice server on a remote machine, just start ## the usual 'soffice' executable in this way: ## $> soffice -headless -nologo -nodefault -norestore -nofirststartwizard \ ## .. -accept=socket,host=HOST,port=PORT;urp;StarOffice.ComponentContext CFG_OPENOFFICE_SERVER_HOST = localhost ## CFG_OPENOFFICE_SERVER_PORT -- the port where an OpenOffice Server is ## listening to. CFG_OPENOFFICE_SERVER_PORT = 2002 ## CFG_OPENOFFICE_USER -- the user that will be used to launch the OpenOffice ## client. It is recommended to set this to a user who don't own files, like ## e.g. 'nobody'. You should also authorize your Apache server user to be ## able to become this user, e.g. by adding to your /etc/sudoers the following ## line: ## "apache ALL=(nobody) NOPASSWD: ALL" ## provided that apache is the username corresponding to the Apache user. ## On some machine this might be apache2 or www-data. CFG_OPENOFFICE_USER = nobody ################################# ## Part 6: BibIndex parameters ## ################################# ## This section contains some configuration parameters for BibIndex ## module. Please note that BibIndex is mostly configured on run-time ## via its BibIndex Admin web interface. The parameters below are the ## ones that you do not probably want to modify very often during the ## runtime. ## CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY -- when fulltext indexing, do ## you want to index locally stored files only, or also external URLs? ## Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY = 0 ## CFG_BIBINDEX_REMOVE_STOPWORDS -- when indexing, do we want to remove ## stopwords? Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_REMOVE_STOPWORDS = 0 ## CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS -- characters considered as ## alphanumeric separators of word-blocks inside words. You probably ## don't want to change this. CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS = \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ ## CFG_BIBINDEX_CHARS_PUNCTUATION -- characters considered as punctuation ## between word-blocks inside words. You probably don't want to ## change this. CFG_BIBINDEX_CHARS_PUNCTUATION = \.\,\:\;\?\!\" ## CFG_BIBINDEX_REMOVE_HTML_MARKUP -- should we attempt to remove HTML markup ## before indexing? Use 1 if you have HTML markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_HTML_MARKUP = 0 ## CFG_BIBINDEX_REMOVE_LATEX_MARKUP -- should we attempt to remove LATEX markup ## before indexing? Use 1 if you have LATEX markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_LATEX_MARKUP = 0 ## CFG_BIBINDEX_MIN_WORD_LENGTH -- minimum word length allowed to be added to ## index. The terms smaller then this amount will be discarded. ## Useful to keep the database clean, however you can safely leave ## this value on 0 for up to 1,000,000 documents. CFG_BIBINDEX_MIN_WORD_LENGTH = 0 ## CFG_BIBINDEX_URLOPENER_USERNAME and CFG_BIBINDEX_URLOPENER_PASSWORD -- ## access credentials to access restricted URLs, interesting only if ## you are fulltext-indexing files located on a remote server that is ## only available via username/password. But it's probably better to ## handle this case via IP or some convention; the current scheme is ## mostly there for demo only. CFG_BIBINDEX_URLOPENER_USERNAME = mysuperuser CFG_BIBINDEX_URLOPENER_PASSWORD = mysuperpass ## CFG_INTBITSET_ENABLE_SANITY_CHECKS -- ## Enable sanity checks for integers passed to the intbitset data ## structures. It is good to enable this during debugging ## and to disable this value for speed improvements. CFG_INTBITSET_ENABLE_SANITY_CHECKS = False ## CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES -- regular expression that matches ## docnames for which OCR is desired (set this to .* in order to enable ## OCR in general, set this to empty in order to disable it.) CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES = scan-.* ## CFG_BIBINDEX_SPLASH_PAGES -- key-value mapping where the key corresponds ## to a regular expression that matches the URLs of the splash pages of ## a given service and the value is a regular expression of the set of URLs ## referenced via tags in the HTML content of the splash pages that are ## referring to documents that need to be indexed. ## NOTE: for backward compatibility reasons you can set this to a simple ## regular expression that will directly be used as the unique key of the ## map, with corresponding value set to ".*" (in order to match any URL) CFG_BIBINDEX_SPLASH_PAGES = { "http://documents\.cern\.ch/setlink\?.*": ".*", "http://ilcagenda\.linearcollider\.org/subContributionDisplay\.py\?.*|http://ilcagenda\.linearcollider\.org/contributionDisplay\.py\?.*": "http://ilcagenda\.linearcollider\.org/getFile\.py/access\?.*|http://ilcagenda\.linearcollider\.org/materialDisplay\.py\?.*", } ## CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES -- do we want ## the author word index to exclude first names to keep only last ## names? If set to True, then for the author `Bernard, Denis', only ## `Bernard' will be indexed in the word index, not `Denis'. Note ## that if you change this variable, you have to re-index the author ## index via `bibindex -w author -R'. CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES = False ## CFG_BIBINDEX_SYNONYM_KBRS -- defines which knowledge bases are to ## be used for which index in order to provide index-time synonym ## lookup, and what massaging function should be used upon search ## pattern before performing the KB lookup. (Can be one of `exact', ## 'leading_to_comma', `leading_to_number'.) CFG_BIBINDEX_SYNONYM_KBRS = { 'global': ['INDEX-SYNONYM-TITLE', 'exact'], 'title': ['INDEX-SYNONYM-TITLE', 'exact'], } ####################################### ## Part 7: Access control parameters ## ####################################### ## This section contains some configuration parameters for the access ## control system. Please note that WebAccess is mostly configured on ## run-time via its WebAccess Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (If you do want to modify them during ## runtime, for example te deny access temporarily because of backups, ## you can edit access_control_config.py directly, no need to get back ## here and no need to redo the make process.) ## CFG_ACCESS_CONTROL_LEVEL_SITE -- defines how open this site is. ## Use 0 for normal operation of the site, 1 for read-only site (all ## write operations temporarily closed), 2 for site fully closed, ## 3 for also disabling any database connection. ## Useful for site maintenance. CFG_ACCESS_CONTROL_LEVEL_SITE = 0 ## CFG_ACCESS_CONTROL_LEVEL_GUESTS -- guest users access policy. Use ## 0 to allow guest users, 1 not to allow them (all users must login). CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0 ## CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS -- account registration and ## activation policy. When 0, users can register and accounts are ## automatically activated. When 1, users can register but admin must ## activate the accounts. When 2, users cannot register nor update ## their email address, only admin can register accounts. When 3, ## users cannot register nor update email address nor password, only ## admin can register accounts. When 4, the same as 3 applies, nor ## user cannot change his login method. When 5, then the same as 4 ## applies, plus info about how to get an account is hidden from the ## login page. CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN -- limit account ## registration to certain email addresses? If wanted, give domain ## name below, e.g. "cern.ch". If not wanted, leave it empty. CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN = ## CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS -- send a ## notification email to the administrator when a new account is ## created? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT -- send a ## notification email to the user when a new account is created in order to ## to verify the validity of the provided email address? Use ## 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 1 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION -- send a ## notification email to the user when a new account is activated? ## Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION -- send a ## notification email to the user when a new account is deleted or ## account demand rejected? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0 ## CFG_APACHE_PASSWORD_FILE -- the file where Apache user credentials ## are stored. Must be an absolute pathname. If the value does not ## start by a slash, it is considered to be the filename of a file ## located under prefix/var/tmp directory. This is useful for the ## demo site testing purposes. For the production site, if you plan ## to restrict access to some collections based on the Apache user ## authentication mechanism, you should put here an absolute path to ## your Apache password file. CFG_APACHE_PASSWORD_FILE = demo-site-apache-user-passwords ## CFG_APACHE_GROUP_FILE -- the file where Apache user groups are ## defined. See the documentation of the preceding config variable. CFG_APACHE_GROUP_FILE = demo-site-apache-user-groups ################################### ## Part 8: WebSession parameters ## ################################### ## This section contains some configuration parameters for tweaking ## session handling. ## CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT -- number of days after which a session ## and the corresponding cookie is considered expired. CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT = 2 ## CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER -- number of days after which a session ## and the corresponding cookie is considered expired, when the user has ## requested to permanently stay logged in. CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER = 365 ## CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS -- when user requested ## a password reset, for how many days is the URL valid? CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS -- when an account ## activation email was sent, for how many days is the URL valid? CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS -- when ## user won't confirm his email address and not complete ## registeration, after how many days will it expire? CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS = 10 ## CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS -- when set to 1, the session ## system allocates the same uid=0 to all guests users regardless of where they ## come from. 0 allocate a unique uid to each guest. CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS = 0 ################################ ## Part 9: BibRank parameters ## ################################ ## This section contains some configuration parameters for the ranking ## system. ## CFG_BIBRANK_SHOW_READING_STATS -- do we want to show reading ## similarity stats? ('People who viewed this page also viewed') CFG_BIBRANK_SHOW_READING_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_STATS -- do we want to show the download ## similarity stats? ('People who downloaded this document also ## downloaded') CFG_BIBRANK_SHOW_DOWNLOAD_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS -- do we want to show download ## history graph? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION -- do we ## want to show a graph representing the distribution of client IPs ## downloading given document? CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION = 0 ## CFG_BIBRANK_SHOW_CITATION_LINKS -- do we want to show the 'Cited ## by' links? (useful only when you have citations in the metadata) CFG_BIBRANK_SHOW_CITATION_LINKS = 1 ## CFG_BIBRANK_SHOW_CITATION_STATS -- de we want to show citation ## stats? ('Cited by M recors', 'Co-cited with N records') CFG_BIBRANK_SHOW_CITATION_STATS = 1 ## CFG_BIBRANK_SHOW_CITATION_GRAPHS -- do we want to show citation ## history graph? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_CITATION_GRAPHS = 1 #################################### ## Part 10: WebComment parameters ## #################################### ## This section contains some configuration parameters for the ## commenting and reviewing facilities. ## CFG_WEBCOMMENT_ALLOW_COMMENTS -- do we want to allow users write ## public comments on records? CFG_WEBCOMMENT_ALLOW_COMMENTS = 1 ## CFG_WEBCOMMENT_ALLOW_REVIEWS -- do we want to allow users write ## public reviews of records? CFG_WEBCOMMENT_ALLOW_REVIEWS = 1 ## CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS -- do we want to allow short ## reviews, that is just the attribution of stars without submitting ## detailed review text? CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS = 0 ## CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN -- if users ## report a comment to be abusive, how many they have to be before the ## site admin is alerted? CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN = 5 ## CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW -- how many comments do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW -- how many reviews do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL -- do we notify the site ## admin after every comment? CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL = 1 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple comment submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS = 20 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple review submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS = 20 ## CFG_WEBCOMMENT_USE_RICH_EDITOR -- enable the WYSIWYG ## Javascript-based editor when user edits comments? CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR = False ## CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be sent: CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = info@invenio-software.org ## CFG_WEBCOMMENT_DEFAULT_MODERATOR -- if no rules are ## specified to indicate who is the comment moderator of ## a collection, this person will be used as default CFG_WEBCOMMENT_DEFAULT_MODERATOR = info@invenio-software.org ## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS -- do we want to allow the use ## of MathJax plugin to render latex input in comments? CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS = 1 ## CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION -- allow comment author to ## delete its own comment? CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION = 1 ################################## ## Part 11: BibSched parameters ## ################################## ## This section contains some configuration parameters for the ## bibliographic task scheduler. ## CFG_BIBSCHED_REFRESHTIME -- how often do we want to refresh ## bibsched monitor? (in seconds) CFG_BIBSCHED_REFRESHTIME = 5 ## CFG_BIBSCHED_LOG_PAGER -- what pager to use to view bibsched task ## logs? CFG_BIBSCHED_LOG_PAGER = /bin/more ## CFG_BIBSCHED_GC_TASKS_OLDER_THAN -- after how many days to perform the ## gargbage collector of BibSched queue (i.e. removing/moving task to archive). CFG_BIBSCHED_GC_TASKS_OLDER_THAN = 30 ## CFG_BIBSCHED_GC_TASKS_TO_REMOVE -- list of BibTask that can be safely ## removed from the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_REMOVE = bibindex,bibreformat,webcoll,bibrank,inveniogc ## CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE -- list of BibTasks that should be safely ## archived out of the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE = bibupload,oaiarchive ## CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS -- maximum number of BibTasks ## that can run concurrently. ## NOTE: concurrent tasks are still considered as an experimental ## feature. Please keep this value set to 1 on production environments. CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS = 1 ## CFG_BIBSCHED_PROCESS_USER -- bibsched and bibtask processes must ## usually run under the same identity as the Apache web server ## process in order to share proper file read/write privileges. If ## you want to force some other bibsched/bibtask user, e.g. because ## you are using a local `invenio' user that belongs to your ## `www-data' Apache user group and so shares writing rights with your ## Apache web server process in this way, then please set its username ## identity here. Otherwise we shall check whether your ## bibsched/bibtask processes are run under the same identity as your ## Apache web server process (in which case you can leave the default ## empty value here). CFG_BIBSCHED_PROCESS_USER = ################################### ## Part 12: WebBasket parameters ## ################################### ## CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS -- a safety limit for ## a maximum number of displayed baskets CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS = 20 ## CFG_WEBBASKET_USE_RICH_TEXT_EDITOR -- enable the WYSIWYG ## Javascript-based editor when user edits comments in WebBasket? CFG_WEBBASKET_USE_RICH_TEXT_EDITOR = False ################################## ## Part 13: WebAlert parameters ## ################################## ## This section contains some configuration parameters for the ## automatic email notification alert system. ## CFG_WEBALERT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be sent: CFG_WEBALERT_ALERT_ENGINE_EMAIL = info@invenio-software.org ## CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL -- how many records ## at most do we send in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL = 20 ## CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL -- number of ## chars per line in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL = 72 ## CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES -- when sending alert ## emails fails, how many times we retry? CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES = 3 ## CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES -- when sending ## alert emails fails, what is the sleeptime between tries? (in ## seconds) CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES = 300 #################################### ## Part 14: WebMessage parameters ## #################################### ## CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE -- how large web messages do we ## allow? CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE = 20000 ## CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES -- how many messages for a ## regular user do we allow in its inbox? CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES = 30 ## CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS -- how many days before ## we delete orphaned messages? CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS = 60 ################################## ## Part 15: MiscUtil parameters ## ################################## ## CFG_MISCUTIL_SQL_USE_SQLALCHEMY -- whether to use SQLAlchemy.pool ## in the DB engine of Invenio. It is okay to enable this flag ## even if you have not installed SQLAlchemy. Note that Invenio will ## loose some perfomance if this option is enabled. CFG_MISCUTIL_SQL_USE_SQLALCHEMY = False ## CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT -- how many queries can we run ## inside run_sql_many() in one SQL statement? The limit value ## depends on MySQL's max_allowed_packet configuration. CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT = 10000 ## CFG_MISCUTIL_SMTP_HOST -- which server to use as outgoing mail server to ## send outgoing emails generated by the system, for example concerning ## submissions or email notification alerts. CFG_MISCUTIL_SMTP_HOST = localhost ## CFG_MISCUTIL_SMTP_PORT -- which port to use on the outgoing mail server ## defined in the previous step. CFG_MISCUTIL_SMTP_PORT = 25 ## CFG_MISCUTILS_DEFAULT_PROCESS_TIMEOUT -- the default number of seconds after ## which a process launched trough shellutils.run_process_with_timeout will ## be killed. This is useful to catch runaway processes. CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT = 300 +## CFG_MATHJAX_HOSTING -- if you plan to use MathJax to display TeX +## formulas on HTML web pages, you can specify whether you wish to use +## 'local' hosting or 'cdn' hosting of MathJax libraries. (If set to +## 'local', you have to run 'make install-mathjax-plugin' as described +## in the INSTALL guide.) If set to 'local', users will use your site +## to download MathJax sources. If set to 'cdn', users will use +## centralized MathJax CDN servers instead. Please note that using +## CDN is suitable only for small institutions or for MathJax +## sponsors; see the MathJax website for more details. (Also, please +## note that if you plan to use MathJax on your site, you have to +## adapt CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS and +## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS configuration variables +## elsewhere in this file.) +CFG_MATHJAX_HOSTING = local + ################################# ## Part 16: BibEdit parameters ## ################################# ## CFG_BIBEDIT_TIMEOUT -- when a user edits a record, this record is ## locked to prevent other users to edit it at the same time. ## How many seconds of inactivity before the locked record again will be free ## for other people to edit? CFG_BIBEDIT_TIMEOUT = 3600 ## CFG_BIBEDIT_LOCKLEVEL -- when a user tries to edit a record which there ## is a pending bibupload task for in the queue, this shouldn't be permitted. ## The lock level determines how thouroughly the queue should be investigated ## to determine if this is the case. ## Level 0 - always permits editing, doesn't look at the queue ## (unsafe, use only if you know what you are doing) ## Level 1 - permits editing if there are no queued bibedit tasks for this record ## (safe with respect to bibedit, but not for other bibupload maintenance jobs) ## Level 2 - permits editing if there are no queued bibupload tasks of any sort ## (safe, but may lock more than necessary if many cataloguers around) ## Level 3 - permits editing if no queued bibupload task concerns given record ## (safe, most precise locking, but slow, ## checks for 001/EXTERNAL_SYSNO_TAG/EXTERNAL_OAIID_TAG) ## The recommended level is 3 (default) or 2 (if you use maintenance jobs often). CFG_BIBEDIT_LOCKLEVEL = 3 ## CFG_BIBEDIT_PROTECTED_FIELDS -- a comma-separated list of fields that BibEdit ## will not allow to be added, edited or deleted. Wildcards are not supported, ## but conceptually a wildcard is added at the end of every field specification. ## Examples: ## 500A - protect all MARC fields with tag 500 and first indicator A ## 5 - protect all MARC fields in the 500-series. ## 909C_a - protect subfield a in tag 909 with first indicator C and empty ## second indicator ## Note that 001 is protected by default, but if protection of other ## identifiers or automated fields is a requirement, they should be added to ## this list. CFG_BIBEDIT_PROTECTED_FIELDS = ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING -- maximum number of records ## that can be modified instantly using the multi-record editor. Above ## this limit, modifications will only be executed in limited hours. CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING = 2000 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING -- maximum number of records ## that can be send for modification without having a superadmin role. ## If the number of records is between CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING ## and this number, the modifications will take place only in limited hours. CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING = 20000 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME -- Allowed time to ## execute modifications on records, when the number exceeds ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING. CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME = 22:00-05:00 ################################### ## Part 17: BibUpload parameters ## ################################### ## CFG_BIBUPLOAD_REFERENCE_TAG -- where do we store references? CFG_BIBUPLOAD_REFERENCE_TAG = 999 ## CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG -- where do we store external ## system numbers? Useful for matching when our records come from an ## external digital library system. CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG = 970__a ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG -- where do we store OAI ID tags ## of harvested records? Useful for matching when we harvest stuff ## via OAI that we do not want to reexport via Invenio OAI; so records ## may have only the source OAI ID stored in this tag (kind of like ## external system number too). CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG = 035__a ## CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG -- where do we store OAI SRC ## tags of harvested records? Useful for matching when we harvest stuff ## via OAI that we do not want to reexport via Invenio OAI; so records ## may have only the source OAI SRC stored in this tag (kind of like ## external system number too). Note that the field should be the same of ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG. CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG = 035__9 ## CFG_BIBUPLOAD_STRONG_TAGS -- a comma-separated list of tags that ## are strong enough to resist the replace mode. Useful for tags that ## might be created from an external non-metadata-like source, ## e.g. the information about the number of copies left. CFG_BIBUPLOAD_STRONG_TAGS = 964 ## CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS -- a comma-separated list ## of tags that contain provenance information that should be checked ## in the bibupload correct mode via matching provenance codes. (Only ## field instances of the same provenance information would be acted ## upon.) Please specify the whole tag info up to subfield codes. CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS = 6531_9 ## CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS -- a comma-separated list of system ## paths from which it is allowed to take fulltextes that will be uploaded via ## FFT (CFG_TMPDIR is included by default). CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS = /tmp,/home ## CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE -- do we want to serialize ## internal representation of records (Pythonic record structure) into ## the database? This can improve internal processing speed of some ## operations at the price of somewhat bigger disk space usage. ## If you change this value after some records have already been added ## to your installation, you may want to run: ## $ /opt/invenio/bin/inveniocfg --reset-recstruct-cache ## in order to either erase the cache thus freeing database space, ## or to fill the cache for all records that have not been cached yet. CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE = 1 ## CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY -- a comma-separated list ## indicating which fields match the file names of the documents to be ## uploaded. ## The matching will be done in the same order as the list provided. CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY = reportnumber,recid ## CFG_BATCHUPLOADER_DAEMON_DIR -- Directory where the batchuploader daemon ## will look for the subfolders metadata and document by default. ## If path is relative, CFG_PREFIX will be joined as a prefix CFG_BATCHUPLOADER_DAEMON_DIR = var/batchupload ## CFG_BATCHUPLOADER_WEB_ROBOT_AGENT -- Comma-separated list to specify the ## agents permitted when calling batch uploader web interface ## cdsweb.cern.ch/batchuploader/robotupload ## if using a curl, eg: curl xxx -A invenio_webupload CFG_BATCHUPLOADER_WEB_ROBOT_AGENT = invenio_webupload ## CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS -- Access list specifying for each ## IP address, which collections are allowed using batch uploader robot ## interface. CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS = { '10.0.0.1': ['BOOK', 'REPORT'], # Example 1 '10.0.0.2': ['POETRY', 'PREPRINT'], # Example 2 } #################################### ## Part 18: BibCatalog parameters ## #################################### ## EXPERIMENTAL: Please do not use. CFG_BIBCATALOG_SYSTEM = CFG_BIBCATALOG_SYSTEM_RT_CLI = /usr/bin/rt CFG_BIBCATALOG_SYSTEM_RT_URL = http://localhost/rt3 CFG_BIBCATALOG_QUEUES = General #################################### ## Part 19: BibFormat parameters ## #################################### ## CFG_BIBFORMAT_HIDDEN_TAGS -- comma-separated list of MARC tags that ## are not shown to users not having cataloging authorizations. CFG_BIBFORMAT_HIDDEN_TAGS = 595 #################################### ## Part 20: BibMatch parameters ## #################################### ## CFG_BIBMATCH_LOCAL_SLEEPTIME -- Determines the amount of seconds to sleep ## between search queries on LOCAL system. CFG_BIBMATCH_LOCAL_SLEEPTIME = 0.0 ## CFG_BIBMATCH_REMOTE_SLEEPTIME -- Determines the amount of seconds to sleep ## between search queries on REMOTE systems. CFG_BIBMATCH_REMOTE_SLEEPTIME = 2.0 ## CFG_BIBMATCH_FUZZY_WORDLIMITS -- Determines the amount of words to extract ## from a certain fields value during fuzzy matching mode. Add/change field ## and appropriate number to the dictionary to configure this. CFG_BIBMATCH_FUZZY_WORDLIMITS = { '100__a': 2, '245__a': 4 } ## CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT -- Determines the amount of empty results ## to accept during fuzzy matching mode. CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT = 1 ## CFG_BIBMATCH_QUERY_TEMPLATES -- Here you can set the various predefined querystrings ## used to standardize common matching queries. By default the following templates ## are given: ## title - standard title search. Taken from 245__a (default) ## title-author - title and author search (i.e. this is a title AND author a) ## Taken from 245__a and 100__a ## reportnumber - reportnumber search (i.e. reportnumber:REP-NO-123). CFG_BIBMATCH_QUERY_TEMPLATES = { 'title' : '[title]', 'title-author' : '[title] [author]', 'reportnumber' : 'reportnumber:[reportnumber]' } ###################################### ## Part 21: BibAuthorID parameters ## ###################################### # CFG_BIBAUTHORID_MAX_PROCESSES is the max number of processes # that may be spawned by the disambiguation algorithm CFG_BIBAUTHORID_MAX_PROCESSES = 4 # CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS is the max number of threads # to parallelize sql queries during personID tables updates CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS = 4 # CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_BCTKD_RA is the minimum confidence needed # when backtracking automatically disambiguated authors to persons. # Values in [0,1] CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_BCTKD_RA = 0.5 # CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_NEW_RA is the threshold for # the confidence in a paper by the disambiguation algorithm to have it # automatically connected to a personID. Papers below the thresholds are # left disconnected from persons if not already connected in other ways. # values in [0,1] CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_NEW_RA = 0.5 # CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH minimum threshold for # disambiguated authors and persons: if less compatible than this the update # process will create a new person to associate to the found disambiguated author. CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH = 0.5 # CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH_P_N is a fallback mechanism # to force a merge if a certain percentage of papers is compatible no matter # what the confidences on the automatically disambiguated author looks like CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH_P_N = 0.5 # CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY defines the user info # keys for externally claimed records in an remote-login scenario--e.g. from arXiv.org # e.g. "external_arxivids" for arXiv SSO CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY = # CFG_BIBAUTHORID_ATTACH_VA_TO_MULTIPLE_RAS determines if the authorid # algorithm is allowed to attach a virtual author to multiple # real authors in the last run of the orphan processing. # Comma separated list of values. CFG_BIBAUTHORID_ATTACH_VA_TO_MULTIPLE_RAS = False # CFG_BIBAUTHORID_AID_ENABLED # Globally enable AuthorID Interfaces. # If False: No guest, user or operator will have access to the system. CFG_BIBAUTHORID_ENABLED = True # CFG_BIBAUTHORID_AID_ON_AUTHORPAGES # Enable AuthorID information on the author pages. CFG_BIBAUTHORID_ON_AUTHORPAGES = True # CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL defines the eMail address # all ticket requests concerning authors will be sent to. CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL = info@invenio-software.org ########################## ## THAT's ALL, FOLKS! ## ########################## diff --git a/modules/bibedit/lib/bibedit_engine.py b/modules/bibedit/lib/bibedit_engine.py index 14e69ed43..3f1ffbd86 100644 --- a/modules/bibedit/lib/bibedit_engine.py +++ b/modules/bibedit/lib/bibedit_engine.py @@ -1,1202 +1,1203 @@ ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0103 """Invenio BibEdit Engine.""" __revision__ = "$Id" from invenio import bibrecord from invenio import bibformat from invenio.bibedit_config import CFG_BIBEDIT_AJAX_RESULT_CODES, \ CFG_BIBEDIT_JS_CHECK_SCROLL_INTERVAL, CFG_BIBEDIT_JS_HASH_CHECK_INTERVAL, \ CFG_BIBEDIT_JS_CLONED_RECORD_COLOR, \ CFG_BIBEDIT_JS_CLONED_RECORD_COLOR_FADE_DURATION, \ CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR, \ CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR_FADE_DURATION, \ CFG_BIBEDIT_JS_NEW_CONTENT_COLOR, \ CFG_BIBEDIT_JS_NEW_CONTENT_COLOR_FADE_DURATION, \ CFG_BIBEDIT_JS_NEW_CONTENT_HIGHLIGHT_DELAY, \ CFG_BIBEDIT_JS_STATUS_ERROR_TIME, CFG_BIBEDIT_JS_STATUS_INFO_TIME, \ CFG_BIBEDIT_JS_TICKET_REFRESH_DELAY, CFG_BIBEDIT_MAX_SEARCH_RESULTS, \ CFG_BIBEDIT_TAG_FORMAT, CFG_BIBEDIT_AJAX_RESULT_CODES_REV, \ CFG_BIBEDIT_AUTOSUGGEST_TAGS, CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS,\ CFG_BIBEDIT_KEYWORD_TAXONOMY, CFG_BIBEDIT_KEYWORD_TAG, \ CFG_BIBEDIT_KEYWORD_RDFLABEL from invenio.config import CFG_SITE_LANG, CFG_DEVEL_SITE from invenio.bibedit_dblayer import get_name_tags_all, reserve_record_id, \ get_related_hp_changesets, get_hp_update_xml, delete_hp_change, \ get_record_last_modification_date, get_record_revision_author, \ get_marcxml_of_record_revision, delete_related_holdingpen_changes, \ get_record_revisions from invenio.bibedit_utils import cache_exists, cache_expired, \ create_cache_file, delete_cache_file, get_bibrecord, \ get_cache_file_contents, get_cache_mtime, get_record_templates, \ get_record_template, latest_record_revision, record_locked_by_other_user, \ record_locked_by_queue, save_xml_record, touch_cache_file, \ update_cache_file_contents, get_field_templates, get_marcxml_of_revision, \ revision_to_timestamp, timestamp_to_revision, \ get_record_revision_timestamps, record_revision_exists, \ can_record_have_physical_copies from invenio.bibrecord import create_record, print_rec, record_add_field, \ record_add_subfield_into, record_delete_field, \ record_delete_subfield_from, \ record_modify_subfield, record_move_subfield, \ create_field, record_replace_field, record_move_fields, \ record_modify_controlfield, record_get_field_values, \ record_get_subfields from invenio.config import CFG_BIBEDIT_PROTECTED_FIELDS, CFG_CERN_SITE, \ CFG_SITE_URL from invenio.search_engine import record_exists, search_pattern from invenio.webuser import session_param_get, session_param_set from invenio.bibcatalog import bibcatalog_system from invenio.webpage import page +from invenio.htmlutils import get_mathjax_header from invenio.bibknowledge import get_kbd_values_for_bibedit, get_kbr_values, \ get_kbt_items_for_bibedit #autosuggest from invenio.bibcirculation_dblayer import get_number_copies, has_copies from invenio.bibcirculation_utils import create_item_details_url from datetime import datetime import re import difflib import zlib import sys if sys.hexversion < 0x2060000: try: import simplejson as json simplejson_available = True except ImportError: # Okay, no Ajax app will be possible, but continue anyway, # since this package is only recommended, not mandatory. simplejson_available = False else: import json simplejson_available = True import invenio.template bibedit_templates = invenio.template.load('bibedit') re_revdate_split = re.compile('^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)') def get_empty_fields_templates(): """ Returning the templates of empty fields : - an empty data field - an empty control field """ return [{ "name": "Empty field", "description": "The data field not containing any " + \ "information filled in", "tag" : "", "ind1" : "", "ind2" : "", "subfields" : [("","")], "isControlfield" : False },{ "name" : "Empty control field", "description" : "The controlfield not containing any " + \ "data or tag description", "isControlfield" : True, "tag" : "", "value" : "" }] def get_available_fields_templates(): """ A method returning all the available field templates Returns a list of descriptors. Each descriptor has the same structure as a full field descriptor inside the record """ templates = get_field_templates() result = get_empty_fields_templates() for template in templates: tplTag = template[3].keys()[0] field = template[3][tplTag][0] if (field[0] == []): # if the field is a controlField, add different structure result.append({ "name" : template[1], "description" : template[2], "isControlfield" : True, "tag" : tplTag, "value" : field[3] }) else: result.append({ "name": template[1], "description": template[2], "tag" : tplTag, "ind1" : field[1], "ind2" : field[2], "subfields" : field[0], "isControlfield" : False }) return result def perform_request_init(uid, ln, req, lastupdated): """Handle the initial request by adding menu and JavaScript to the page.""" errors = [] warnings = [] body = '' # Add script data. record_templates = get_record_templates() record_templates.sort() tag_names = get_name_tags_all() protected_fields = ['001'] protected_fields.extend(CFG_BIBEDIT_PROTECTED_FIELDS.split(',')) history_url = '"' + CFG_SITE_URL + '/admin/bibedit/bibeditadmin.py/history"' cern_site = 'false' if not simplejson_available: title = 'Record Editor' body = '''Sorry, the record editor cannot operate when the `simplejson' module is not installed. Please see the INSTALL file.''' return page(title = title, body = body, errors = [], warnings = [], uid = uid, language = ln, navtrail = "", lastupdated = lastupdated, req = req) if CFG_CERN_SITE: cern_site = 'true' data = {'gRECORD_TEMPLATES': record_templates, 'gTAG_NAMES': tag_names, 'gPROTECTED_FIELDS': protected_fields, 'gSITE_URL': '"' + CFG_SITE_URL + '"', 'gHISTORY_URL': history_url, 'gCERN_SITE': cern_site, 'gHASH_CHECK_INTERVAL': CFG_BIBEDIT_JS_HASH_CHECK_INTERVAL, 'gCHECK_SCROLL_INTERVAL': CFG_BIBEDIT_JS_CHECK_SCROLL_INTERVAL, 'gSTATUS_ERROR_TIME': CFG_BIBEDIT_JS_STATUS_ERROR_TIME, 'gSTATUS_INFO_TIME': CFG_BIBEDIT_JS_STATUS_INFO_TIME, 'gCLONED_RECORD_COLOR': '"' + CFG_BIBEDIT_JS_CLONED_RECORD_COLOR + '"', 'gCLONED_RECORD_COLOR_FADE_DURATION': CFG_BIBEDIT_JS_CLONED_RECORD_COLOR_FADE_DURATION, 'gNEW_ADD_FIELD_FORM_COLOR': '"' + CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR + '"', 'gNEW_ADD_FIELD_FORM_COLOR_FADE_DURATION': CFG_BIBEDIT_JS_NEW_ADD_FIELD_FORM_COLOR_FADE_DURATION, 'gNEW_CONTENT_COLOR': '"' + CFG_BIBEDIT_JS_NEW_CONTENT_COLOR + '"', 'gNEW_CONTENT_COLOR_FADE_DURATION': CFG_BIBEDIT_JS_NEW_CONTENT_COLOR_FADE_DURATION, 'gNEW_CONTENT_HIGHLIGHT_DELAY': CFG_BIBEDIT_JS_NEW_CONTENT_HIGHLIGHT_DELAY, 'gTICKET_REFRESH_DELAY': CFG_BIBEDIT_JS_TICKET_REFRESH_DELAY, 'gRESULT_CODES': CFG_BIBEDIT_AJAX_RESULT_CODES, 'gAUTOSUGGEST_TAGS' : CFG_BIBEDIT_AUTOSUGGEST_TAGS, 'gAUTOCOMPLETE_TAGS' : CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS.keys(), 'gKEYWORD_TAG' : '"' + CFG_BIBEDIT_KEYWORD_TAG + '"' } body += '\n' # Adding the information about field templates fieldTemplates = get_available_fields_templates() body += "\n" # Add scripts (the ordering is NOT irrelevant). scripts = ['jquery.min.js', 'jquery.effects.core.min.js', 'jquery.effects.highlight.min.js', 'jquery.autogrow.js', 'jquery.jeditable.mini.js', 'jquery.hotkeys.min.js', 'json2.js', 'bibedit_display.js', 'bibedit_engine.js', 'bibedit_keys.js', 'bibedit_menu.js', 'bibedit_holdingpen.js', 'marcxml.js', 'bibedit_clipboard.js'] for script in scripts: body += ' \n' % (CFG_SITE_URL, script) # Build page structure and menu. # rec = create_record(format_record(235, "xm"))[0] #oaiId = record_extract_oai_id(rec) body += bibedit_templates.menu() body += '
\n' return body, errors, warnings def get_xml_comparison(header1, header2, xml1, xml2): """ Return diffs of two MARCXML records. """ return "".join(difflib.unified_diff(xml1.splitlines(1), xml2.splitlines(1), header1, header2)) def get_marcxml_of_revision_id(recid, revid): """ Return MARCXML string with corresponding to revision REVID (=RECID.REVDATE) of a record. Return empty string if revision does not exist. """ res = "" job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revid).groups() tmp_res = get_marcxml_of_record_revision(recid, job_date) if tmp_res: for row in tmp_res: res += zlib.decompress(row[0]) + "\n" return res def perform_request_compare(ln, recid, rev1, rev2): """Handle a request for comparing two records""" body = "" errors = [] warnings = [] if (not record_revision_exists(recid, rev1)) or \ (not record_revision_exists(recid, rev2)): body = "The requested record revision does not exist !" else: xml1 = get_marcxml_of_revision_id(recid, rev1) xml2 = get_marcxml_of_revision_id(recid, rev2) fullrevid1 = "%i.%s" % (recid, rev1) fullrevid2 = "%i.%s" % (recid, rev2) comparison = bibedit_templates.clean_value( get_xml_comparison(fullrevid1, fullrevid2, xml1, xml2), 'text').replace('\n', '
\n ') job_date1 = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(rev1).groups() job_date2 = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(rev2).groups() body += bibedit_templates.history_comparebox(ln, job_date1, job_date2, comparison) return body, errors, warnings def perform_request_newticket(recid, uid): """create a new ticket with this record's number @param recid: record id @param uid: user id @return: (error_msg, url) """ t_id = bibcatalog_system.ticket_submit(uid, "", recid, "") t_url = "" errmsg = "" if t_id: #get the ticket's URL t_url = bibcatalog_system.ticket_get_attribute(uid, t_id, 'url_modify') else: errmsg = "ticket_submit failed" return (errmsg, t_url) def perform_request_ajax(req, recid, uid, data, isBulk = False, \ ln = CFG_SITE_LANG): """Handle Ajax requests by redirecting to appropriate function.""" response = {} request_type = data['requestType'] undo_redo = None if data.has_key("undoRedo"): undo_redo = data["undoRedo"] # Call function based on request type. if request_type == 'searchForRecord': # Search request. response.update(perform_request_search(data)) elif request_type in ['changeTagFormat']: # User related requests. response.update(perform_request_user(req, request_type, recid, data)) elif request_type in ('getRecord', 'submit', 'cancel', 'newRecord', 'deleteRecord', 'deleteRecordCache', 'prepareRecordMerge', 'revert'): # 'Major' record related requests. response.update(perform_request_record(req, request_type, recid, uid, data)) elif request_type in ('addField', 'addSubfields', \ 'addFieldsSubfieldsOnPositions', 'modifyContent', \ 'modifySubfieldTag', 'modifyFieldTag', \ 'moveSubfield', 'deleteFields', 'moveField', \ 'modifyField', 'otherUpdateRequest', \ 'disableHpChange', 'deactivateHoldingPenChangeset'): # Record updates. cacheMTime = data['cacheMTime'] if data.has_key('hpChanges'): hpChanges = data['hpChanges'] else: hpChanges = {} response.update(perform_request_update_record(request_type, recid, \ uid, cacheMTime, data, \ hpChanges, undo_redo, \ isBulk, ln)) elif request_type in ('autosuggest', 'autocomplete', 'autokeyword'): response.update(perform_request_autocomplete(request_type, recid, uid, \ data)) elif request_type in ('getTickets', ): # BibCatalog requests. response.update(perform_request_bibcatalog(request_type, recid, uid)) elif request_type in ('getHoldingPenUpdates', ): response.update(perform_request_holdingpen(request_type, recid)) elif request_type in ('getHoldingPenUpdateDetails', \ 'deleteHoldingPenChangeset'): updateId = data['changesetNumber'] response.update(perform_request_holdingpen(request_type, recid, \ updateId)) elif request_type in ('applyBulkUpdates', ): # a general version of a bulk request changes = data['requestsData'] cacheMTime = data['cacheMTime'] response.update(perform_bulk_request_ajax(req, recid, uid, changes, \ undo_redo, cacheMTime)) elif request_type in ('preview', ): response.update(perform_request_preview_record(request_type, recid, uid)) return response def perform_bulk_request_ajax(req, recid, uid, reqsData, undoRedo, cacheMTime): """ An AJAX handler used when treating bulk updates """ lastResult = {} lastTime = cacheMTime isFirst = True for data in reqsData: assert data != None data['cacheMTime'] = lastTime if isFirst and undoRedo != None: # we add the undo/redo handler to the first operation in order to # save the handler on the server side ! data['undoRedo'] = undoRedo isFirst = False lastResult = perform_request_ajax(req, recid, uid, data, True) # now we have to update the cacheMtime in next request ! # if lastResult.has_key('cacheMTime'): try: lastTime = lastResult['cacheMTime'] except: raise Exception(str(lastResult)) return lastResult def perform_request_search(data): """Handle search requests.""" response = {} searchType = data['searchType'] if searchType is None: searchType = "anywhere" searchPattern = data['searchPattern'] if searchType == 'anywhere': pattern = searchPattern else: pattern = searchType + ':' + searchPattern result_set = list(search_pattern(p=pattern)) response['resultCode'] = 1 response['resultSet'] = result_set[0:CFG_BIBEDIT_MAX_SEARCH_RESULTS] return response def perform_request_user(req, request_type, recid, data): """Handle user related requests.""" response = {} if request_type == 'changeTagFormat': try: tagformat_settings = session_param_get(req, 'bibedit_tagformat') except KeyError: tagformat_settings = {} tagformat_settings[recid] = data['tagFormat'] session_param_set(req, 'bibedit_tagformat', tagformat_settings) response['resultCode'] = 2 return response def perform_request_holdingpen(request_type, recId, changeId=None): """ A method performing the holdingPen ajax request. The following types of requests can be made: getHoldingPenUpdates - retrieving the holding pen updates pending for a given record """ response = {} if request_type == 'getHoldingPenUpdates': changeSet = get_related_hp_changesets(recId) changes = [] for change in changeSet: changes.append((str(change[0]), str(change[1]))) response["changes"] = changes elif request_type == 'getHoldingPenUpdateDetails': # returning the list of changes related to the holding pen update # the format based on what the record difference xtool returns assert(changeId != None) hpContent = get_hp_update_xml(changeId) holdingPenRecord = create_record(hpContent[0], "xm")[0] # databaseRecord = get_record(hpContent[1]) response['record'] = holdingPenRecord response['changeset_number'] = changeId elif request_type == 'deleteHoldingPenChangeset': assert(changeId != None) delete_hp_change(changeId) return response def perform_request_record(req, request_type, recid, uid, data, ln=CFG_SITE_LANG): """Handle 'major' record related requests like fetching, submitting or deleting a record, cancel editing or preparing a record for merging. """ response = {} if request_type == 'newRecord': # Create a new record. new_recid = reserve_record_id() new_type = data['newType'] if new_type == 'empty': # Create a new empty record. create_cache_file(recid, uid) response['resultCode'], response['newRecID'] = 6, new_recid elif new_type == 'template': # Create a new record from XML record template. template_filename = data['templateFilename'] template = get_record_template(template_filename) if not template: response['resultCode'] = 108 else: record = create_record(template)[0] if not record: response['resultCode'] = 109 else: record_add_field(record, '001', controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response['resultCode'], response['newRecID'] = 7, new_recid elif new_type == 'clone': # Clone an existing record (from the users cache). existing_cache = cache_exists(recid, uid) if existing_cache: try: record = get_cache_file_contents(recid, uid)[2] except: # if, for example, the cache format was wrong (outdated) record = get_bibrecord(recid) else: # Cache missing. Fall back to using original version. record = get_bibrecord(recid) record_delete_field(record, '001') record_add_field(record, '001', controlfield_value=str(new_recid)) create_cache_file(new_recid, uid, record, True) response['resultCode'], response['newRecID'] = 8, new_recid elif request_type == 'getRecord': # Fetch the record. Possible error situations: # - Non-existing record # - Deleted record # - Record locked by other user # - Record locked by queue # A cache file will be created if it does not exist. # If the cache is outdated (i.e., not based on the latest DB revision), # cacheOutdated will be set to True in the response. record_status = record_exists(recid) existing_cache = cache_exists(recid, uid) read_only_mode = False if data.has_key("inReadOnlyMode"): read_only_mode = data['inReadOnlyMode'] if record_status == 0: response['resultCode'] = 102 elif record_status == -1: response['resultCode'] = 103 elif not read_only_mode and not existing_cache and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif not read_only_mode and existing_cache and \ cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif not read_only_mode and record_locked_by_queue(recid): response['resultCode'] = 105 else: if data.get('deleteRecordCache'): delete_cache_file(recid, uid) existing_cache = False pending_changes = [] disabled_hp_changes = {} if read_only_mode: if data.has_key('recordRevision'): record_revision_ts = data['recordRevision'] record_xml = get_marcxml_of_revision(recid, \ record_revision_ts) record = create_record(record_xml)[0] record_revision = timestamp_to_revision(record_revision_ts) pending_changes = [] disabled_hp_changes = {} else: # a normal cacheless retrieval of a record record = get_bibrecord(recid) record_revision = get_record_last_modification_date(recid) if record_revision == None: record_revision = datetime.now().timetuple() pending_changes = [] disabled_hp_changes = {} cache_dirty = False mtime = 0 undo_list = [] redo_list = [] elif not existing_cache: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} undo_list = [] redo_list = [] cache_dirty = False else: #TODO: This try except should be replaced with something nicer, # like an argument indicating if a new cache file is to # be created try: cache_dirty, record_revision, record, pending_changes, \ disabled_hp_changes, undo_list, redo_list = \ get_cache_file_contents(recid, uid) touch_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) if not latest_record_revision(recid, record_revision) and \ get_record_revisions(recid) != (): # This sould prevent from using old cache in case of # viewing old version. If there are no revisions, # it means we should skip this step because this # is a new record response['cacheOutdated'] = True except: record_revision, record = create_cache_file(recid, uid) mtime = get_cache_mtime(recid, uid) pending_changes = [] disabled_hp_changes = {} cache_dirty = False undo_list = [] redo_list = [] if data.get('clonedRecord',''): response['resultCode'] = 9 else: response['resultCode'] = 3 revision_author = get_record_revision_author(recid, record_revision) latest_revision = get_record_last_modification_date(recid) if latest_revision == None: latest_revision = datetime.now().timetuple() last_revision_ts = revision_to_timestamp(latest_revision) revisions_history = get_record_revision_timestamps(recid) number_of_physical_copies = get_number_copies(recid) bibcirc_details_URL = create_item_details_url(recid, ln) can_have_copies = can_record_have_physical_copies(recid) response['cacheDirty'], response['record'], \ response['cacheMTime'], response['recordRevision'], \ response['revisionAuthor'], response['lastRevision'], \ response['revisionsHistory'], response['inReadOnlyMode'], \ response['pendingHpChanges'], response['disabledHpChanges'], \ response['undoList'], response['redoList'] = cache_dirty, \ record, mtime, revision_to_timestamp(record_revision), \ revision_author, last_revision_ts, revisions_history, \ read_only_mode, pending_changes, disabled_hp_changes, \ undo_list, redo_list response['numberOfCopies'] = number_of_physical_copies response['bibCirculationUrl'] = bibcirc_details_URL response['canRecordHavePhysicalCopies'] = can_have_copies # Set tag format from user's session settings. try: tagformat_settings = session_param_get(req, 'bibedit_tagformat') tagformat = tagformat_settings[recid] except KeyError: tagformat = CFG_BIBEDIT_TAG_FORMAT response['tagFormat'] = tagformat elif request_type == 'submit': # Submit the record. Possible error situations: # - Missing cache file # - Cache file modified in other editor # - Record locked by other user # - Record locked by queue # - Invalid XML characters # If the cache is outdated cacheOutdated will be set to True in the # response. if not cache_exists(recid, uid): response['resultCode'] = 106 elif not get_cache_mtime(recid, uid) == data['cacheMTime']: response['resultCode'] = 107 elif cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: try: tmp_result = get_cache_file_contents(recid, uid) record_revision = tmp_result[1] record = tmp_result[2] pending_changes = tmp_result[3] # disabled_changes = tmp_result[4] xml_record = print_rec(record) record, status_code, list_of_errors = create_record(xml_record) if status_code == 0: response['resultCode'], response['errors'] = 110, \ list_of_errors elif not data['force'] and \ not latest_record_revision(recid, record_revision): response['cacheOutdated'] = True if CFG_DEVEL_SITE: response['record_revision'] = record_revision.__str__() response['newest_record_revision'] = \ get_record_last_modification_date(recid).__str__() else: save_xml_record(recid, uid) response['resultCode'] = 4 except Exception, e: response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'error_wrong_cache_file_format'] if CFG_DEVEL_SITE: # return debug information in the request response['exception_message'] = e.__str__() elif request_type == 'revert': revId = data['revId'] job_date = "%s-%s-%s %s:%s:%s" % re_revdate_split.search(revId).groups() revision_xml = get_marcxml_of_revision(recid, job_date) save_xml_record(recid, uid, revision_xml) if (cache_exists(recid, uid)): delete_cache_file(recid, uid) response['resultCode'] = 4 elif request_type == 'cancel': # Cancel editing by deleting the cache file. Possible error situations: # - Cache file modified in other editor if cache_exists(recid, uid): if get_cache_mtime(recid, uid) == data['cacheMTime']: delete_cache_file(recid, uid) response['resultCode'] = 5 else: response['resultCode'] = 107 else: response['resultCode'] = 5 elif request_type == 'deleteRecord': # Submit the record. Possible error situations: # - Record locked by other user # - Record locked by queue # As the user is requesting deletion we proceed even if the cache file # is missing and we don't check if the cache is outdated or has # been modified in another editor. existing_cache = cache_exists(recid, uid) pending_changes = [] if has_copies(recid): response['resultCode'] = \ CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_physical_copies_exist'] elif existing_cache and cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = \ CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_rec_locked_by_user'] elif record_locked_by_queue(recid): response['resultCode'] = \ CFG_BIBEDIT_AJAX_RESULT_CODES_REV['error_rec_locked_by_queue'] else: if not existing_cache: record_revision, record, pending_changes, \ deactivated_hp_changes, undo_list, redo_list = \ create_cache_file(recid, uid) else: try: record_revision, record, pending_changes, \ deactivated_hp_changes, undo_list, redo_list = \ get_cache_file_contents(recid, uid)[1:] except: record_revision, record, pending_changes, \ deactivated_hp_changes = create_cache_file(recid, uid) record_add_field(record, '980', ' ', ' ', '', [('c', 'DELETED')]) undo_list = [] redo_list = [] update_cache_file_contents(recid, uid, record_revision, record, \ pending_changes, \ deactivated_hp_changes, undo_list, \ redo_list) save_xml_record(recid, uid) delete_related_holdingpen_changes(recid) # we don't need any changes # related to a deleted record response['resultCode'] = 10 elif request_type == 'deleteRecordCache': # Delete the cache file. Ignore the request if the cache has been # modified in another editor. if cache_exists(recid, uid) and get_cache_mtime(recid, uid) == \ data['cacheMTime']: delete_cache_file(recid, uid) response['resultCode'] = 11 elif request_type == 'prepareRecordMerge': # We want to merge the cache with the current DB version of the record, # so prepare an XML file from the file cache, to be used by BibMerge. # Possible error situations: # - Missing cache file # - Record locked by other user # - Record locked by queue # We don't check if cache is outdated (a likely scenario for this # request) or if it has been modified in another editor. if not cache_exists(recid, uid): response['resultCode'] = 106 elif cache_expired(recid, uid) and \ record_locked_by_other_user(recid, uid): response['resultCode'] = 104 elif record_locked_by_queue(recid): response['resultCode'] = 105 else: save_xml_record(recid, uid, to_upload=False, to_merge=True) response['resultCode'] = 12 return response def perform_request_update_record(request_type, recid, uid, cacheMTime, data, \ hpChanges, undoRedoOp, isBulk=False, \ ln=CFG_SITE_LANG): """Handle record update requests like adding, modifying, moving or deleting of fields or subfields. Possible common error situations: - Missing cache file - Cache file modified in other editor Explanation of some parameters: undoRedoOp - Indicates in "undo"/"redo"/undo_descriptor operation is performed by a current request. """ response = {} if not cache_exists(recid, uid): response['resultCode'] = 106 elif not get_cache_mtime(recid, uid) == cacheMTime and isBulk == False: # In case of a bulk request, the changes are deliberately performed # imemdiately one after another response['resultCode'] = 107 else: try: record_revision, record, pending_changes, deactivated_hp_changes, \ undo_list, redo_list = get_cache_file_contents(recid, uid)[1:] except: response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'error_wrong_cache_file_format'] return response # process all the Holding Pen changes operations ... regardles the # request type # import rpdb2; # rpdb2.start_embedded_debugger('password', fAllowRemote=True) if hpChanges.has_key("toDisable"): for changeId in hpChanges["toDisable"]: pending_changes[changeId]["applied_change"] = True if hpChanges.has_key("toEnable"): for changeId in hpChanges["toEnable"]: pending_changes[changeId]["applied_change"] = False if hpChanges.has_key("toOverride"): pending_changes = hpChanges["toOverride"] if hpChanges.has_key("changesetsToDeactivate"): for changesetId in hpChanges["changesetsToDeactivate"]: deactivated_hp_changes[changesetId] = True if hpChanges.has_key("changesetsToActivate"): for changesetId in hpChanges["changesetsToActivate"]: deactivated_hp_changes[changesetId] = False # processing the undo/redo entries if undoRedoOp == "undo": try: redo_list = [undo_list[-1]] + redo_list undo_list = undo_list[:-1] except: raise Exception("An exception occured when undoing previous" + \ " operation. Undo list: " + str(undo_list) + \ " Redo list " + str(redo_list)) elif undoRedoOp == "redo": try: undo_list = undo_list + [redo_list[0]] redo_list = redo_list[1:] except: raise Exception("An exception occured when redoing previous" + \ " operation. Undo list: " + str(undo_list) + \ " Redo list " + str(redo_list)) else: # This is a genuine operation - we have to add a new descriptor # to the undo list and cancel the redo unless the operation is # a bulk operation if undoRedoOp != None: undo_list = undo_list + [undoRedoOp] redo_list = [] else: assert isBulk == True field_position_local = data.get('fieldPosition') if field_position_local is not None: field_position_local = int(field_position_local) if request_type == 'otherUpdateRequest': # An empty request. Might be useful if we want to perform # operations that require only the actions performed globally, # like modifying the holdingPen changes list response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'editor_modifications_changed'] elif request_type == 'deactivateHoldingPenChangeset': # the changeset has been marked as processed ( user applied it in # the editor). Marking as used in the cache file. # CAUTION: This function has been implemented here because logically # it fits with the modifications made to the cache file. # No changes are made to the Holding Pen physically. The # changesets are related to the cache because we want to # cancel the removal every time the cache disappears for # any reason response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV[ \ 'disabled_hp_changeset'] elif request_type == 'addField': if data['controlfield']: record_add_field(record, data['tag'], controlfield_value=data['value']) response['resultCode'] = 20 else: record_add_field(record, data['tag'], data['ind1'], data['ind2'], subfields=data['subfields'], field_position_local=field_position_local) response['resultCode'] = 21 elif request_type == 'addSubfields': subfields = data['subfields'] for subfield in subfields: record_add_subfield_into(record, data['tag'], subfield[0], subfield[1], subfield_position=None, field_position_local=field_position_local) if len(subfields) == 1: response['resultCode'] = 22 else: response['resultCode'] = 23 elif request_type == 'addFieldsSubfieldsOnPositions': #1) Sorting the fields by their identifiers fieldsToAdd = data['fieldsToAdd'] subfieldsToAdd = data['subfieldsToAdd'] for tag in fieldsToAdd.keys(): positions = fieldsToAdd[tag].keys() positions.sort() for position in positions: # now adding fields at a position isControlfield = (len(fieldsToAdd[tag][position][0]) == 0) # if there are n subfields, this is a control field if isControlfield: controlfieldValue = fieldsToAdd[tag][position][3] record_add_field(record, tag, field_position_local = \ int(position), \ controlfield_value = \ controlfieldValue) else: subfields = fieldsToAdd[tag][position][0] ind1 = fieldsToAdd[tag][position][1] ind2 = fieldsToAdd[tag][position][2] record_add_field(record, tag, ind1, ind2, subfields = \ subfields, field_position_local = \ int(position)) # now adding the subfields for tag in subfieldsToAdd.keys(): for fieldPosition in subfieldsToAdd[tag].keys(): #now the fields #order not important ! subfieldsPositions = subfieldsToAdd[tag][fieldPosition]. \ keys() subfieldsPositions.sort() for subfieldPosition in subfieldsPositions: subfield = subfieldsToAdd[tag][fieldPosition]\ [subfieldPosition] record_add_subfield_into(record, tag, subfield[0], \ subfield[1], \ subfield_position = \ int(subfieldPosition), \ field_position_local = \ int(fieldPosition)) response['resultCode'] = \ CFG_BIBEDIT_AJAX_RESULT_CODES_REV['added_positioned_subfields'] elif request_type == 'modifyField': # changing the field structure # first remove subfields and then add new... change the indices subfields = data['subFields'] # parse the JSON representation of # the subfields here new_field = create_field(subfields, data['ind1'], data['ind2']) record_replace_field(record, data['tag'], new_field, \ field_position_local = data['fieldPosition']) response['resultCode'] = 26 elif request_type == 'modifyContent': if data['subfieldIndex'] != None: record_modify_subfield(record, data['tag'], data['subfieldCode'], data['value'], int(data['subfieldIndex']), field_position_local=field_position_local) else: record_modify_controlfield(record, data['tag'], data["value"], field_position_local=field_position_local) response['resultCode'] = 24 elif request_type == 'modifySubfieldTag': record_add_subfield_into(record, data['tag'], data['subfieldCode'], data["value"], subfield_position= int(data['subfieldIndex']), field_position_local=field_position_local) record_delete_subfield_from(record, data['tag'], int(data['subfieldIndex']) + 1, field_position_local=field_position_local) response['resultCode'] = 24 elif request_type == 'modifyFieldTag': subfields = record_get_subfields(record, data['oldTag'], field_position_local=field_position_local) record_add_field(record, data['newTag'], data['ind1'], data['ind2'] , subfields=subfields) record_delete_field(record, data['oldTag'], ind1=data['oldInd1'], \ ind2=data['oldInd2'], field_position_local=field_position_local) response['resultCode'] = 32 elif request_type == 'moveSubfield': record_move_subfield(record, data['tag'], int(data['subfieldIndex']), int(data['newSubfieldIndex']), field_position_local=field_position_local) response['resultCode'] = 25 elif request_type == 'moveField': if data['direction'] == 'up': final_position_local = field_position_local-1 else: # direction is 'down' final_position_local = field_position_local+1 record_move_fields(record, data['tag'], [field_position_local], final_position_local) response['resultCode'] = 32 elif request_type == 'deleteFields': to_delete = data['toDelete'] deleted_fields = 0 deleted_subfields = 0 for tag in to_delete: #Sorting the fields in a edcreasing order by the local position! fieldsOrder = to_delete[tag].keys() fieldsOrder.sort(lambda a, b: int(b) - int(a)) for field_position_local in fieldsOrder: if not to_delete[tag][field_position_local]: # No subfields specified - delete entire field. record_delete_field(record, tag, field_position_local=int(field_position_local)) deleted_fields += 1 else: for subfield_position in \ to_delete[tag][field_position_local][::-1]: # Delete subfields in reverse order (to keep the # indexing correct). record_delete_subfield_from(record, tag, int(subfield_position), field_position_local=int(field_position_local)) deleted_subfields += 1 if deleted_fields == 1 and deleted_subfields == 0: response['resultCode'] = 26 elif deleted_fields and deleted_subfields == 0: response['resultCode'] = 27 elif deleted_subfields == 1 and deleted_fields == 0: response['resultCode'] = 28 elif deleted_subfields and deleted_fields == 0: response['resultCode'] = 29 else: response['resultCode'] = 30 response['cacheMTime'], response['cacheDirty'] = \ update_cache_file_contents(recid, uid, record_revision, record, \ pending_changes, \ deactivated_hp_changes, \ undo_list, redo_list), \ True return response def perform_request_autocomplete(request_type, recid, uid, data): """ Perfrom an AJAX request associated with the retrieval of autocomplete data. Arguments: request_type: Type of the currently served request recid: the identifer of the record uid: The identifier of the user being currently logged in data: The request data containing possibly important additional arguments """ response = {} # get the values based on which one needs to search searchby = data['value'] #we check if the data is properly defined fulltag = '' if data.has_key('maintag') and data.has_key('subtag1') and \ data.has_key('subtag2') and data.has_key('subfieldcode'): maintag = data['maintag'] subtag1 = data['subtag1'] subtag2 = data['subtag2'] u_subtag1 = subtag1 u_subtag2 = subtag2 if (not subtag1) or (subtag1 == ' '): u_subtag1 = '_' if (not subtag2) or (subtag2 == ' '): u_subtag2 = '_' subfieldcode = data['subfieldcode'] fulltag = maintag+u_subtag1+u_subtag2+subfieldcode if (request_type == 'autokeyword'): #call the keyword-form-ontology function if fulltag and searchby: items = get_kbt_items_for_bibedit(CFG_BIBEDIT_KEYWORD_TAXONOMY, \ CFG_BIBEDIT_KEYWORD_RDFLABEL, \ searchby) response['autokeyword'] = items if (request_type == 'autosuggest'): #call knowledge base function to put the suggestions in an array.. if fulltag and searchby and len(searchby) > 3: suggest_values = get_kbd_values_for_bibedit(fulltag, "", searchby) #remove .. new_suggest_vals = [] for sugg in suggest_values: if sugg.startswith(searchby): new_suggest_vals.append(sugg) response['autosuggest'] = new_suggest_vals if (request_type == 'autocomplete'): #call the values function with the correct kb_name if CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS.has_key(fulltag): kbname = CFG_BIBEDIT_AUTOCOMPLETE_TAGS_KBS[fulltag] #check if the seachby field has semicolons. Take all #the semicolon-separated items.. items = [] vals = [] if searchby: if searchby.rfind(';'): items = searchby.split(';') else: items = [searchby.strip()] for item in items: item = item.strip() kbrvals = get_kbr_values(kbname, item, '', 'e') #we want an exact match if kbrvals and kbrvals[0]: #add the found val into vals vals.append(kbrvals[0]) #check that the values are not already contained in other #instances of this field record = get_cache_file_contents(recid, uid)[2] xml_rec = print_rec(record) record, status_code, dummy_errors = create_record(xml_rec) existing_values = [] if (status_code != 0): existing_values = record_get_field_values(record, maintag, subtag1, subtag2, subfieldcode) #get the new values.. i.e. vals not in existing new_vals = vals for val in new_vals: if val in existing_values: new_vals.remove(val) response['autocomplete'] = new_vals response['resultCode'] = CFG_BIBEDIT_AJAX_RESULT_CODES_REV['autosuggestion_scanned'] return response def perform_request_bibcatalog(request_type, recid, uid): """Handle request to BibCatalog (RT). """ response = {} if request_type == 'getTickets': # Insert the ticket data in the response, if possible if uid: bibcat_resp = bibcatalog_system.check_system(uid) if bibcat_resp == "": tickets_found = bibcatalog_system.ticket_search(uid, \ status=['new', 'open'], recordid=recid) t_url_str = '' #put ticket urls here, formatted for HTML display for t_id in tickets_found: #t_url = bibcatalog_system.ticket_get_attribute(uid, \ # t_id, 'url_display') ticket_info = bibcatalog_system.ticket_get_info( \ uid, t_id, ['url_display', 'url_close']) t_url = ticket_info['url_display'] t_close_url = ticket_info['url_close'] #format.. t_url_str += "#" + str(t_id) + '
[read] [close]
' #put ticket header and tickets links in the box t_url_str = "Tickets
" + t_url_str + \ "
" + '" response['resultCode'] = 31 return response def perform_request_preview_record(request_type, recid, uid): """ Handle request to preview record with formatting """ response = {} if request_type == "preview": if cache_exists(recid, uid): dummy1, dummy2, record, dummy3, dummy4, dummy5, dummy6 = get_cache_file_contents(recid, uid) else: record = get_bibrecord(recid) response['html_preview'] = _get_formated_record(record) return response def _get_formated_record(record): """Returns a record in a given format @param record: BibRecord object """ xml_record = bibrecord.record_xml_output(record) result = "Record preview" - result += "" + result += get_mathjax_header() result += "

Brief format preview

" result += bibformat.format_record(recID=None, of="hb", xml_record=xml_record) result += "

Detailed format preview

" result += bibformat.format_record(recID=None, of="hd", xml_record=xml_record) result += "" return result diff --git a/modules/miscutil/lib/htmlutils.py b/modules/miscutil/lib/htmlutils.py index 3557dd0bf..2a839b834 100644 --- a/modules/miscutil/lib/htmlutils.py +++ b/modules/miscutil/lib/htmlutils.py @@ -1,525 +1,549 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """HTML utilities.""" __revision__ = "$Id$" from HTMLParser import HTMLParser -from invenio.config import CFG_SITE_URL +from invenio.config import CFG_SITE_URL, CFG_MATHJAX_HOSTING from invenio.textutils import indent_text import re import cgi try: from invenio.fckeditor import fckeditor fckeditor_available = True except ImportError, e: fckeditor_available = False # List of allowed tags (tags that won't create any XSS risk) cfg_html_buffer_allowed_tag_whitelist = ('a', 'p', 'br', 'blockquote', 'strong', 'b', 'u', 'i', 'em', 'ul', 'ol', 'li', 'sub', 'sup', 'div', 'strike') # List of allowed attributes. Be cautious, some attributes may be risky: #

cfg_html_buffer_allowed_attribute_whitelist = ('href', 'name', 'class') ## precompile some often-used regexp for speed reasons: re_html = re.compile("(?s)<[^>]*>|&#?\w+;") def nmtoken_from_string(text): """ Returns a Nmtoken from a string. It is useful to produce XHTML valid values for the 'name' attribute of an anchor. CAUTION: the function is surjective: 2 different texts might lead to the same result. This is improbable on a single page. Nmtoken is the type that is a mixture of characters supported in attributes such as 'name' in HTML 'a' tag. For example, should be tranformed to using this function. http://www.w3.org/TR/2000/REC-xml-20001006#NT-Nmtoken Also note that this function filters more characters than specified by the definition of Nmtoken ('CombiningChar' and 'Extender' charsets are filtered out). """ text = text.replace('-', '--') return ''.join( [( ((not char.isalnum() and not char in ['.', '-', '_', ':']) and str(ord(char))) or char) for char in text] ) def escape_html(text, escape_quotes=False): """Escape all HTML tags, avoiding XSS attacks. < => < > => > & => &: @param text: text to be escaped from HTML tags @param escape_quotes: if True, escape any quote mark to its HTML entity: " => " ' => " """ text = text.replace('&', '&') text = text.replace('<', '<') text = text.replace('>', '>') if escape_quotes: text = text.replace('"', '"') text = text.replace("'", '"') return text class HTMLWasher(HTMLParser): """ Creates a washer for HTML, avoiding XSS attacks. See wash function for details on parameters. Usage:: from invenio.htmlutils import HTMLWasher washer = HTMLWasher() escaped_text = washer.wash(unescaped_text) Examples:: a.wash('Spam and eggs') => 'Spam and eggs' a.wash('Spam and eggs', True) => 'Spam and <blink>eggs</blink>' a.wash('Spam and eggs') => 'Spam and eggs' a.wash('Spam and eggs') =>'Spam and eggs' a.wash('Spam and poilu') =>'Spam and eggs' """ silent = False def __init__(self): """ Constructor; initializes washer """ HTMLParser.__init__(self) self.result = '' self.render_unallowed_tags = False self.allowed_tag_whitelist = \ cfg_html_buffer_allowed_tag_whitelist self.allowed_attribute_whitelist = \ cfg_html_buffer_allowed_attribute_whitelist # javascript: self.re_js = re.compile( ".*(j|j|J)"\ "\s*(a|a|A)"\ "\s*(v|v|V)"\ "\s*(a|a|A)"\ "\s*(s|s|S)"\ "\s*(c|c|C)"\ "\s*(r|r|R)"\ "\s*(i|Ã|I)"\ "\s*(p|p|P)"\ "\s*(t|p|T)"\ "\s*(:|:).*", re.IGNORECASE | re.DOTALL) # vbscript: self.re_vb = re.compile( ".*(v|v|V)"\ "\s*(b|b|B)"\ "\s*(s|s|S)"\ "\s*(c|c|C)"\ "\s*(r|r|R)"\ "\s*(i|Ã|I)"\ "\s*(p|p|P)"\ "\s*(t|p|T)"\ "\s*(:|:).*", re.IGNORECASE | re.DOTALL) def wash(self, html_buffer, render_unallowed_tags=False, allowed_tag_whitelist=cfg_html_buffer_allowed_tag_whitelist, allowed_attribute_whitelist=\ cfg_html_buffer_allowed_attribute_whitelist): """ Wash HTML buffer, escaping XSS attacks. @param html_buffer: text to escape @param render_unallowed_tags: if True, print unallowed tags escaping < and >. Else, only print content of unallowed tags. @param allowed_tag_whitelist: list of allowed tags @param allowed_attribute_whitelist: list of allowed attributes """ self.reset() self.result = '' self.render_unallowed_tags = render_unallowed_tags self.allowed_tag_whitelist = allowed_tag_whitelist self.allowed_attribute_whitelist = allowed_attribute_whitelist self.feed(html_buffer) self.close() return self.result def handle_starttag(self, tag, attrs): """Function called for new opening tags""" if tag.lower() in self.allowed_tag_whitelist: self.result += '<' + tag for (attr, value) in attrs: if attr.lower() in self.allowed_attribute_whitelist: self.result += ' %s="%s"' % \ (attr, self.handle_attribute_value(value)) self.result += '>' else: if self.render_unallowed_tags: self.result += '<' + cgi.escape(tag) for (attr, value) in attrs: self.result += ' %s="%s"' % \ (attr, cgi.escape(value, True)) self.result += '>' elif tag == 'style' or tag == 'script': # In that case we want to remove content too self.silent = True def handle_data(self, data): """Function called for text nodes""" if not self.silent: self.result += cgi.escape(data, True) def handle_endtag(self, tag): """Function called for ending of tags""" if tag.lower() in self.allowed_tag_whitelist: self.result += '' else: if self.render_unallowed_tags: self.result += '</' + cgi.escape(tag) + '>' if tag == 'style' or tag == 'script': self.silent = False def handle_startendtag(self, tag, attrs): """Function called for empty tags (e.g.
)""" if tag.lower() in self.allowed_tag_whitelist: self.result += '<' + tag for (attr, value) in attrs: if attr.lower() in self.allowed_attribute_whitelist: self.result += ' %s="%s"' % \ (attr, self.handle_attribute_value(value)) self.result += ' />' else: if self.render_unallowed_tags: self.result += '<' + cgi.escape(tag) for (attr, value) in attrs: self.result += ' %s="%s"' % \ (attr, cgi.escape(value, True)) self.result += ' />' def handle_attribute_value(self, value): """Check attribute. Especially designed for avoiding URLs in the form: javascript:myXSSFunction();""" if self.re_js.match(value) or self.re_vb.match(value): return '' return value def handle_charref(self, name): """Process character references of the form "&#ref;". Return it as it is.""" self.result += '&#' + name + ';' def handle_entityref(self, name): """Process a general entity reference of the form "&name;". Return it as it is.""" self.result += '&' + name + ';' +def get_mathjax_header(): + """ + Return the snippet of HTML code to put in HTML HEAD tag, in order to + enable MathJax support. + @note: with new releases of MathJax, update this function toghether with + $MJV variable in the root Makefile.am + """ + if CFG_MATHJAX_HOSTING.lower() == 'cdn': + mathjax_path = "http://cdn.mathjax.org/mathjax/1.1-latest" + else: + mathjax_path = "/MathJax" + return """ +""" % { + 'mathjax_path': mathjax_path +} + + def get_html_text_editor(name, id=None, content='', textual_content=None, width='300px', height='200px', enabled=True, file_upload_url=None, toolbar_set="Basic", custom_configurations_path='/fckeditor/invenio-fckeditor-config.js'): """ Returns a wysiwyg editor (FCKeditor) to embed in html pages. Fall back to a simple textarea when the library is not installed, or when the user's browser is not compatible with the editor, or when 'enable' is False, or when javascript is not enabled. NOTE that the output also contains a hidden field named 'editor_type' that contains the kind of editor used, 'textarea' or 'fckeditor'. Based on 'editor_type' you might want to take different actions, like replace CRLF with
when editor_type equals to 'textarea', but not when editor_type equals to 'fckeditor'. @param name: *str* the name attribute of the returned editor @param id: *str* the id attribute of the returned editor (when applicable) @param content: *str* the default content of the editor. @param textual_content: *str* a content formatted for the case where the wysiwyg editor is not available for user. When not specified, use value of 'content' @param width: *str* width of the editor in an html compatible unit: Eg: '400px', '50%'. @param height: *str* height of the editor in an html compatible unit: Eg: '400px', '50%'. @param enabled: *bool* if the wysiwyg editor is return (True) or if a simple texteara is returned (False) @param file_upload_url: *str* the URL used to upload new files via the editor upload panel. You have to implement the handler for your own use. The URL handler will get form variables 'File' as POST for the uploaded file, and 'Type' as GET for the type of file ('file', 'image', 'flash', 'media') When value is not given, the file upload is disabled. @param toolbar_set: *str* the name of the toolbar layout to use. FCKeditor comes by default with 'Basic' and 'Default'. To define other sets, customize the config file in /opt/invenio/var/www/fckeditor/invenio-fckconfig.js @param custom_configurations_path: *str* value for the FCKeditor config variable 'CustomConfigurationsPath', which allows to specify the path of a file that contains a custom configuration for the editor. The path is relative to /opt/invenio/var/www/ @return: the HTML markup of the editor """ if textual_content is None: textual_content = content editor = '' if enabled and fckeditor_available: # Prepare upload path settings if file_upload_url is not None: file_upload_script = ''' oFCKeditor.Config["LinkUploadURL"] = '%(file_upload_url)s'; oFCKeditor.Config["ImageUploadURL"] = '%(file_upload_url)s?type=Image'; oFCKeditor.Config["FlashUploadURL"] = '%(file_upload_url)s?type=Flash'; oFCKeditor.Config["MediaUploadURL"] = '%(file_upload_url)s?type=Media'; oFCKeditor.Config["LinkUpload"] = true; oFCKeditor.Config["ImageUpload"] = true; oFCKeditor.Config["FlashUpload"] = true; ''' % {'file_upload_url': file_upload_url} else: file_upload_script = ''' oFCKeditor.Config["LinkUpload"] = false; oFCKeditor.Config["ImageUpload"] = false; oFCKeditor.Config["FlashUpload"] = false; ''' # Prepare code to instantiate an editor editor += ''' ''' % \ {'textual_content': cgi.escape(textual_content), 'html_content': content, 'width': width, 'height': height, 'name': name, 'id': id or name, 'custom_configurations_path': custom_configurations_path, 'toolbar': toolbar_set, 'file_upload_script': file_upload_script, 'CFG_SITE_URL': CFG_SITE_URL} else: # FCKedior is not installed textarea = '' \ % {'content': cgi.escape(textual_content), 'width': width, 'height': height, 'name': name, 'id': id and ('id="%s"' % id) or ''} editor += textarea editor += '' return editor def remove_html_markup(text, replacechar=' '): """ Remove HTML markup from text. @param text: Input text. @type text: string. @param replacechar: By which character should we replace HTML markup. Usually, a single space or an empty string are nice values. @type replacechar: string @return: Input text with HTML markup removed. @rtype: string """ return re_html.sub(replacechar, text) def create_html_tag(tag, body=None, escape_body=False, escape_attr=True, indent=0, attrs=None, **other_attrs): """ Create an HTML tag. This function create a full HTML tag, putting toghether an optional inner body and a dictionary of attributes. >>> print create_html_tag ("select", create_html_tag("h1", ... "hello", other_attrs={'class': "foo"})) @param tag: the tag (e.g. "select", "body", "h1"...). @type tag: string @param body: some text/HTML to put in the body of the tag (this body will be indented WRT the tag). @type body: string @param escape_body: wether the body (if any) must be escaped. @type escape_body: boolean @param escape_attr: wether the attribute values (if any) must be escaped. @type escape_attr: boolean @param indent: number of level of indentation for the tag. @type indent: integer @param attrs: map of attributes to add to the tag. @type attrs: dict @return: the HTML tag. @rtype: string """ if attrs is None: attrs = {} attrs.update(other_attrs) out = "<%s" % tag for key, value in attrs.iteritems(): if escape_attr: value = escape_html(value, escape_quotes=True) out += ' %s="%s"' % (key, value) if body: out += ">\n" if escape_body: body = escape_html(body) out += indent_text(body, 1) out += "" % tag else: out += " />" out = indent_text(out, indent) out = out[:-1] # Let's remove trailing new line return out def create_html_select(options, selected=None, attrs=None, **other_attrs): """ Create an HTML select box. >>> print create_html_select(["foo", "bar"], selected="bar", name="baz") >>> print create_html_select({"foo": "oof", "bar": "rab"}, selected="bar", name="baz") @param options: this can either be a sequence of strings or a map of C{key->value}. In the former case, the C{select} tag will contain a list of C{option} tags (in alphabetical order), where the C{value} attribute is not specified. In the latter case, the C{value} attribute will be set to the C{key}, while the body of the C{option} will be set to C{value}. @type options: sequence or map @param selected: optional key/value to select by default. In case a map has been used for options, C{selected} must be set to an existing C{key}, otherwise it must be set to an existing C{value}. @type selected: string @param attrs: optional attributes to create the select tag. @type attrs: dict @param other_attrs: other optional attributes. @return: the HTML output. @rtype: string @note: the values and keys will be escaped for HTML. """ body = [] try: items = options.items() items.sort(lambda item1, item2: cmp(item1[1], item2[1])) for key, value in items: option_attrs = key == selected and {"selected": "selected"} or {} body.append(create_html_tag("option", body=value, escape_body=True, value=key, attrs=option_attrs)) except AttributeError: options.sort() for value in options: option_attrs = value == selected and {"selected": "selected"} or {} body.append(create_html_tag("option", body=value, escape_body=True, attrs=option_attrs)) return create_html_tag("select", body='\n'.join(body), attrs=attrs, **other_attrs) class _LinkGetter(HTMLParser): """ Hidden class that, by deriving from HTMLParser, will intercept all tags and retrieve the corresponding href attribute. All URLs are available in the urls attribute of the class. """ def __init__(self): HTMLParser.__init__(self) self.urls = set() def handle_starttag(self, tag, attrs): if tag == 'a': for (name, value) in attrs: if name == 'href': self.urls.add(value) def get_links_in_html_page(html): """ @param html: the HTML text to parse @type html: str @return: the list of URLs that were referenced via tags. @rtype: set of str """ parser = _LinkGetter() parser.feed(html) return parser.urls diff --git a/modules/webcomment/lib/webcomment_webinterface.py b/modules/webcomment/lib/webcomment_webinterface.py index 7e0741f4d..2134d7566 100644 --- a/modules/webcomment/lib/webcomment_webinterface.py +++ b/modules/webcomment/lib/webcomment_webinterface.py @@ -1,812 +1,813 @@ # -*- coding: utf-8 -*- ## Comments and reviews for records. ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Comments and reviews for records: web interface """ __lastupdated__ = """$Date$""" __revision__ = """$Id$""" import cgi from invenio.webcomment import check_recID_is_in_range, \ perform_request_display_comments_or_remarks, \ perform_request_add_comment_or_remark, \ perform_request_vote, \ perform_request_report, \ subscribe_user_to_discussion, \ unsubscribe_user_from_discussion, \ get_user_subscription_to_discussion, \ check_user_can_attach_file_to_comments, \ check_user_can_view_comments, \ check_user_can_send_comments, \ check_user_can_view_comment, \ query_get_comment from invenio.config import \ CFG_TMPDIR, \ CFG_SITE_LANG, \ CFG_SITE_URL, \ CFG_PREFIX, \ CFG_WEBCOMMENT_ALLOW_COMMENTS,\ CFG_WEBCOMMENT_ALLOW_REVIEWS, \ CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS from invenio.webuser import getUid, page_not_authorized, isGuestUser, collect_user_info from invenio.webpage import page, pageheaderonly, pagefooteronly from invenio.search_engine import create_navtrail_links, \ guess_primary_collection_of_a_record, \ get_colID from invenio.urlutils import redirect_to_url, \ make_canonical_urlargd +from invenio.htmlutils import get_mathjax_header from invenio.errorlib import register_exception from invenio.messages import gettext_set_language from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.websearchadminlib import get_detailed_page_tabs from invenio.access_control_config import VIEWRESTRCOLL from invenio.access_control_mailcookie import \ mail_cookie_create_authorize_action, \ mail_cookie_create_common, \ mail_cookie_check_common, \ InvenioWebAccessMailCookieDeletedError, \ InvenioWebAccessMailCookieError from invenio.webcomment_config import \ CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE, \ CFG_WEBCOMMENT_MAX_ATTACHED_FILES import invenio.template webstyle_templates = invenio.template.load('webstyle') websearch_templates = invenio.template.load('websearch') try: from invenio.fckeditor_invenio_connector import FCKeditorConnectorInvenio fckeditor_available = True except ImportError, e: fckeditor_available = False import os from invenio import webinterface_handler_config as apache from invenio.bibdocfile import \ stream_file, \ decompose_file, \ propose_next_docname class WebInterfaceCommentsPages(WebInterfaceDirectory): """Defines the set of /comments pages.""" _exports = ['', 'display', 'add', 'vote', 'report', 'index', 'attachments', 'subscribe', 'unsubscribe'] def __init__(self, recid=-1, reviews=0): self.recid = recid self.discussion = reviews # 0:comments, 1:reviews self.attachments = WebInterfaceCommentsFiles(recid, reviews) def index(self, req, form): """ Redirects to display function """ return self.display(req, form) def display(self, req, form): """ Display comments (reviews if enabled) associated with record having id recid where recid>0. This function can also be used to display remarks associated with basket having id recid where recid<-99. @param ln: language @param recid: record id, integer @param do: display order hh = highest helpful score, review only lh = lowest helpful score, review only hs = highest star score, review only ls = lowest star score, review only od = oldest date nd = newest date @param ds: display since all= no filtering by date nd = n days ago nw = n weeks ago nm = n months ago ny = n years ago where n is a single digit integer between 0 and 9 @param nb: number of results per page @param p: results page @param voted: boolean, active if user voted for a review, see vote function @param reported: int, active if user reported a certain comment/review, see report function @param reviews: boolean, enabled for reviews, disabled for comments @param subscribed: int, 1 if user just subscribed to discussion, -1 if unsubscribed @return the full html page. """ argd = wash_urlargd(form, {'do': (str, "od"), 'ds': (str, "all"), 'nb': (int, 100), 'p': (int, 1), 'voted': (int, -1), 'reported': (int, -1), 'subscribed': (int, 0), 'cmtgrp': (list, ["latest"]) # 'latest' is now a reserved group/round name }) _ = gettext_set_language(argd['ln']) uid = getUid(req) user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg) can_send_comments = False (auth_code, auth_msg) = check_user_can_send_comments(user_info, self.recid) if not auth_code: can_send_comments = True can_attach_files = False (auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid) if not auth_code and (user_info['email'] != 'guest'): can_attach_files = True subscription = get_user_subscription_to_discussion(self.recid, uid) if subscription == 1: user_is_subscribed_to_discussion = True user_can_unsubscribe_from_discussion = True elif subscription == 2: user_is_subscribed_to_discussion = True user_can_unsubscribe_from_discussion = False else: user_is_subscribed_to_discussion = False user_can_unsubscribe_from_discussion = False #display_comment_rounds = [cmtgrp for cmtgrp in argd['cmtgrp'] if cmtgrp.isdigit() or cmtgrp == "all" or cmtgrp == "-1"] display_comment_rounds = argd['cmtgrp'] check_warnings = [] (ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd['ln']) if ok: (body, errors, warnings) = perform_request_display_comments_or_remarks(req=req, recID=self.recid, display_order=argd['do'], display_since=argd['ds'], nb_per_page=argd['nb'], page=argd['p'], ln=argd['ln'], voted=argd['voted'], reported=argd['reported'], subscribed=argd['subscribed'], reviews=self.discussion, uid=uid, can_send_comments=can_send_comments, can_attach_files=can_attach_files, user_is_subscribed_to_discussion=user_is_subscribed_to_discussion, user_can_unsubscribe_from_discussion=user_can_unsubscribe_from_discussion, display_comment_rounds=display_comment_rounds ) unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(self.recid)), self.recid, ln=argd['ln']) ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()] ordered_tabs_id.sort(lambda x, y: cmp(x[1], y[1])) link_ln = '' if argd['ln'] != CFG_SITE_LANG: link_ln = '?ln=%s' % argd['ln'] tabs = [(unordered_tabs[tab_id]['label'], \ '%s/record/%s/%s%s' % (CFG_SITE_URL, self.recid, tab_id, link_ln), \ tab_id in ['comments', 'reviews'], unordered_tabs[tab_id]['enabled']) \ for (tab_id, order) in ordered_tabs_id if unordered_tabs[tab_id]['visible'] == True] top = webstyle_templates.detailed_record_container_top(self.recid, tabs, argd['ln']) bottom = webstyle_templates.detailed_record_container_bottom(self.recid, tabs, argd['ln']) title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln']) navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid), ln=argd['ln']) if navtrail: navtrail += ' > ' navtrail += ''% (CFG_SITE_URL, self.recid, argd['ln']) navtrail += title navtrail += '' navtrail += ' > %s' % (self.discussion==1 and _("Reviews") or _("Comments")) mathjaxheader = '' if CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS: - mathjaxheader = """""" + mathjaxheader = get_mathjax_header() jqueryheader = ''' ''' % {'CFG_SITE_URL': CFG_SITE_URL} return pageheaderonly(title=title, navtrail=navtrail, uid=uid, verbose=1, metaheaderadd = mathjaxheader + jqueryheader, req=req, language=argd['ln'], navmenuid='search', navtrail_append_title_p=0) + \ websearch_templates.tmpl_search_pagestart(argd['ln']) + \ top + body + bottom + \ websearch_templates.tmpl_search_pageend(argd['ln']) + \ pagefooteronly(lastupdated=__lastupdated__, language=argd['ln'], req=req) else: return page(title=_("Record Not Found"), body=problem, uid=uid, verbose=1, req=req, language=argd['ln'], warnings=check_warnings, errors=[], navmenuid='search') # Return the same page wether we ask for /record/123 or /record/123/ __call__ = index def add(self, req, form): """ Add a comment (review) to record with id recid where recid>0 Also works for adding a remark to basket with id recid where recid<-99 @param ln: languange @param recid: record id @param action: 'DISPLAY' to display add form 'SUBMIT' to submit comment once form is filled 'REPLY' to reply to an already existing comment @param msg: the body of the comment/review or remark @param score: star score of the review @param note: title of the review @param comid: comment id, needed for replying @param editor_type: the type of editor used for submitting the comment: 'textarea', 'fckeditor'. @param subscribe: if set, subscribe user to receive email notifications when new comment are added to this discussion @return the full html page. """ argd = wash_urlargd(form, {'action': (str, "DISPLAY"), 'msg': (str, ""), 'note': (str, ''), 'score': (int, 0), 'comid': (int, 0), 'editor_type': (str, ""), 'subscribe': (str, ""), 'cookie': (str, "") }) _ = gettext_set_language(argd['ln']) actions = ['DISPLAY', 'REPLY', 'SUBMIT'] uid = getUid(req) # Is site ready to accept comments? if uid == -1 or (not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS): return page_not_authorized(req, "../comments/add", navmenuid='search') # Is user allowed to post comment? user_info = collect_user_info(req) (auth_code_1, auth_msg_1) = check_user_can_view_comments(user_info, self.recid) (auth_code_2, auth_msg_2) = check_user_can_send_comments(user_info, self.recid) if isGuestUser(uid): cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) # Save user's value in cookie, so that these "POST" # parameters are not lost during login process msg_cookie = mail_cookie_create_common('comment_msg', {'msg': argd['msg'], 'note': argd['note'], 'score': argd['score'], 'editor_type': argd['editor_type'], 'subscribe': argd['subscribe']}, onetime=True) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri'] + '&cookie=' + msg_cookie}, {}) return redirect_to_url(req, target, norobot=True) elif (auth_code_1 or auth_code_2): return page_not_authorized(req, "../", \ text = auth_msg_1 + auth_msg_2) user_info = collect_user_info(req) can_attach_files = False (auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid) if not auth_code and (user_info['email'] != 'guest'): can_attach_files = True warning_msgs = [] added_files = {} if can_attach_files: # User is allowed to attach files. Process the files file_too_big = False formfields = form.get('commentattachment[]', []) if not hasattr(formfields, "__getitem__"): # A single file was uploaded formfields = [formfields] for formfield in formfields[:CFG_WEBCOMMENT_MAX_ATTACHED_FILES]: if hasattr(formfield, "filename") and formfield.filename: filename = formfield.filename dir_to_open = os.path.join(CFG_TMPDIR, 'webcomment', str(uid)) try: assert(dir_to_open.startswith(CFG_TMPDIR)) except AssertionError: register_exception(req=req, prefix='User #%s tried to upload file to forbidden location: %s' \ % (uid, dir_to_open)) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except: register_exception(req=req, alert_admin=True) ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists(os.path.join(dir_to_open, filename)): basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension fp = open(os.path.join(dir_to_open, filename), "w") # FIXME: temporary, waiting for wsgi handler to be # fixed. Once done, read chunk by chunk ## while formfield.file: ## fp.write(formfield.file.read(10240)) fp.write(formfield.file.read()) fp.close() # Isn't this file too big? file_size = os.path.getsize(os.path.join(dir_to_open, filename)) if CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE > 0 and \ file_size > CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE: os.remove(os.path.join(dir_to_open, filename)) # One file is too big: record that, # dismiss all uploaded files and re-ask to # upload again file_too_big = True warning_msgs.append(('WRN_WEBCOMMENT_MAX_FILE_SIZE_REACHED', cgi.escape(filename), str(file_size/1024) + 'KB', str(CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE/1024) + 'KB')) else: added_files[filename] = os.path.join(dir_to_open, filename) if file_too_big: # One file was too big. Removed all uploaded filed for filepath in added_files.items(): try: os.remove(filepath) except: # File was already removed or does not exist? pass client_ip_address = req.remote_ip check_warnings = [] (ok, problem) = check_recID_is_in_range(self.recid, check_warnings, argd['ln']) if ok: title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, argd['ln']) navtrail = create_navtrail_links(cc=guess_primary_collection_of_a_record(self.recid)) if navtrail: navtrail += ' > ' navtrail += ''% (CFG_SITE_URL, self.recid, argd['ln']) navtrail += title navtrail += '' navtrail += '> %s' % (CFG_SITE_URL, self.recid, self.discussion==1 and 'reviews' or 'comments', argd['ln'], self.discussion==1 and _('Reviews') or _('Comments')) if argd['action'] not in actions: argd['action'] = 'DISPLAY' if not argd['msg']: # User had to login in-between, so retrieve msg # from cookie try: (kind, cookie_argd) = mail_cookie_check_common(argd['cookie'], delete=True) argd.update(cookie_argd) except InvenioWebAccessMailCookieDeletedError, e: return redirect_to_url(req, CFG_SITE_URL + '/record/' + \ str(self.recid) + (self.discussion==1 and \ '/reviews' or '/comments')) except InvenioWebAccessMailCookieError, e: # Invalid or empty cookie: continue pass subscribe = False if argd['subscribe'] and \ get_user_subscription_to_discussion(self.recid, uid) == 0: # User is not already subscribed, and asked to subscribe subscribe = True (body, errors, warnings) = perform_request_add_comment_or_remark(recID=self.recid, ln=argd['ln'], uid=uid, action=argd['action'], msg=argd['msg'], note=argd['note'], score=argd['score'], reviews=self.discussion, comID=argd['comid'], client_ip_address=client_ip_address, editor_type=argd['editor_type'], can_attach_files=can_attach_files, subscribe=subscribe, req=req, attached_files=added_files, warnings=warning_msgs) if self.discussion: title = _("Add Review") else: title = _("Add Comment") jqueryheader = ''' ''' % {'CFG_SITE_URL': CFG_SITE_URL} return page(title=title, body=body, navtrail=navtrail, uid=uid, language=CFG_SITE_LANG, verbose=1, errors=errors, warnings=warnings, req=req, navmenuid='search', metaheaderadd=jqueryheader) # id not in range else: return page(title=_("Record Not Found"), body=problem, uid=uid, verbose=1, req=req, warnings=check_warnings, errors=[], navmenuid='search') def vote(self, req, form): """ Vote positively or negatively for a comment/review. @param comid: comment/review id @param com_value: +1 to vote positively -1 to vote negatively @param recid: the id of the record the comment/review is associated with @param ln: language @param do: display order hh = highest helpful score, review only lh = lowest helpful score, review only hs = highest star score, review only ls = lowest star score, review only od = oldest date nd = newest date @param ds: display since all= no filtering by date nd = n days ago nw = n weeks ago nm = n months ago ny = n years ago where n is a single digit integer between 0 and 9 @param nb: number of results per page @param p: results page @param referer: http address of the calling function to redirect to (refresh) @param reviews: boolean, enabled for reviews, disabled for comments """ argd = wash_urlargd(form, {'comid': (int, -1), 'com_value': (int, 0), 'recid': (int, -1), 'do': (str, "od"), 'ds': (str, "all"), 'nb': (int, 100), 'p': (int, 1), 'referer': (str, None) }) client_ip_address = req.remote_ip uid = getUid(req) user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg) success = perform_request_vote(argd['comid'], client_ip_address, argd['com_value'], uid) if argd['referer']: argd['referer'] += "?ln=%s&do=%s&ds=%s&nb=%s&p=%s&voted=%s&" % ( argd['ln'], argd['do'], argd['ds'], argd['nb'], argd['p'], success) redirect_to_url(req, argd['referer']) else: #Note: sent to comments display referer = "%s/record/%s/%s?&ln=%s&voted=1" referer %= (CFG_SITE_URL, self.recid, self.discussion == 1 and 'reviews' or 'comments', argd['ln']) redirect_to_url(req, referer) def report(self, req, form): """ Report a comment/review for inappropriate content @param comid: comment/review id @param recid: the id of the record the comment/review is associated with @param ln: language @param do: display order hh = highest helpful score, review only lh = lowest helpful score, review only hs = highest star score, review only ls = lowest star score, review only od = oldest date nd = newest date @param ds: display since all= no filtering by date nd = n days ago nw = n weeks ago nm = n months ago ny = n years ago where n is a single digit integer between 0 and 9 @param nb: number of results per page @param p: results page @param referer: http address of the calling function to redirect to (refresh) @param reviews: boolean, enabled for reviews, disabled for comments """ argd = wash_urlargd(form, {'comid': (int, -1), 'recid': (int, -1), 'do': (str, "od"), 'ds': (str, "all"), 'nb': (int, 100), 'p': (int, 1), 'referer': (str, None) }) client_ip_address = req.remote_ip uid = getUid(req) user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid) if auth_code or user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg) success = perform_request_report(argd['comid'], client_ip_address, uid) if argd['referer']: argd['referer'] += "?ln=%s&do=%s&ds=%s&nb=%s&p=%s&reported=%s&" % (argd['ln'], argd['do'], argd['ds'], argd['nb'], argd['p'], str(success)) redirect_to_url(req, argd['referer']) else: #Note: sent to comments display referer = "%s/record/%s/%s/display?ln=%s&voted=1" referer %= (CFG_SITE_URL, self.recid, self.discussion==1 and 'reviews' or 'comments', argd['ln']) redirect_to_url(req, referer) def subscribe(self, req, form): """ Subscribe current user to receive email notification when new comments are added to current discussion. """ argd = wash_urlargd(form, {'referer': (str, None)}) uid = getUid(req) user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid) if isGuestUser(uid): cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg) success = subscribe_user_to_discussion(self.recid, uid) display_url = "%s/record/%s/comments/display?subscribed=%s&ln=%s" % \ (CFG_SITE_URL, self.recid, str(success), argd['ln']) redirect_to_url(req, display_url) def unsubscribe(self, req, form): """ Unsubscribe current user from current discussion. """ argd = wash_urlargd(form, {'referer': (str, None)}) user_info = collect_user_info(req) uid = getUid(req) if isGuestUser(uid): cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) success = unsubscribe_user_from_discussion(self.recid, uid) display_url = "%s/record/%s/comments/display?subscribed=%s&ln=%s" % \ (CFG_SITE_URL, self.recid, str(-success), argd['ln']) redirect_to_url(req, display_url) class WebInterfaceCommentsFiles(WebInterfaceDirectory): """Handle upload and access to files for comments. The upload is currently only available through the FCKeditor. """ #_exports = ['put'] # 'get' is handled by _lookup(..) def __init__(self, recid=-1, reviews=0): self.recid = recid self.discussion = reviews # 0:comments, 1:reviews def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for getting and putting attachments) Eg: CFG_SITE_URL/record/5953/comments/attachments/get/652/myfile.pdf """ if component == 'get' and len(path) > 1: comid = path[0] # comment ID file_name = '/'.join(path[1:]) # the filename def answer_get(req, form): """Accessing files attached to comments.""" form['file'] = file_name form['comid'] = comid return self._get(req, form) return answer_get, [] # All other cases: file not found return None, [] def _get(self, req, form): """ Returns a file attached to a comment. Example: CFG_SITE_URL/record/5953/comments/attachments/get/652/myfile.pdf where 652 is the comment ID """ argd = wash_urlargd(form, {'file': (str, None), 'comid': (int, 0)}) _ = gettext_set_language(argd['ln']) # Can user view this record, i.e. can user access its # attachments? uid = getUid(req) user_info = collect_user_info(req) # Check that user can view record, and its comments (protected # with action "viewcomment") (auth_code, auth_msg) = check_user_can_view_comments(user_info, self.recid) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg) # Does comment exist? if not query_get_comment(argd['comid']): req.status = apache.HTTP_NOT_FOUND return page(title=_("Page Not Found"), body=_('The requested comment could not be found'), req=req) # Check that user can view this particular comment, protected # using its own restriction (auth_code, auth_msg) = check_user_can_view_comment(user_info, argd['comid']) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : \ CFG_SITE_URL + user_info['uri']}, {}) return redirect_to_url(req, target) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg, ln=argd['ln']) if not argd['file'] is None: # Prepare path to file on disk. Normalize the path so that # ../ and other dangerous components are removed. path = os.path.abspath(CFG_PREFIX + '/var/data/comments/' + \ str(self.recid) + '/' + str(argd['comid']) + \ '/' + argd['file']) # Check that we are really accessing attachements # directory, for the declared record. if path.startswith(CFG_PREFIX + '/var/data/comments/' + \ str(self.recid)) and \ os.path.exists(path): return stream_file(req, path) # Send error 404 in all other cases req.status = apache.HTTP_NOT_FOUND return page(title=_("Page Not Found"), body=_('The requested file could not be found'), req=req, language=argd['ln']) ## def put(self, req, form): ## """ ## Process requests received from FCKeditor to upload files, etc. ## """ ## if not fckeditor_available: ## return ## uid = getUid(req) ## # URL where the file can be fetched after upload ## user_files_path = '%(CFG_SITE_URL)s/record/%(recid)i/comments/attachments/get/%(uid)s' % \ ## {'uid': uid, ## 'recid': self.recid, ## 'CFG_SITE_URL': CFG_SITE_URL} ## # Path to directory where uploaded files are saved ## user_files_absolute_path = '%(CFG_PREFIX)s/var/data/comments/%(recid)s/%(uid)s' % \ ## {'uid': uid, ## 'recid': self.recid, ## 'CFG_PREFIX': CFG_PREFIX} ## # Create a Connector instance to handle the request ## conn = FCKeditorConnectorInvenio(form, recid=self.recid, uid=uid, ## allowed_commands=['QuickUpload'], ## allowed_types = ['File', 'Image', 'Flash', 'Media'], ## user_files_path = user_files_path, ## user_files_absolute_path = user_files_absolute_path) ## # Check that user can upload attachments for comments. ## user_info = collect_user_info(req) ## (auth_code, auth_msg) = check_user_can_attach_file_to_comments(user_info, self.recid) ## if user_info['email'] == 'guest': ## # User is guest: must login prior to upload ## data = conn.sendUploadResults(1, '', '', 'Please login before uploading file.') ## elif auth_code: ## # User cannot submit ## data = conn.sendUploadResults(1, '', '', 'Sorry, you are not allowed to submit files.') ## else: ## # Process the upload and get the response ## data = conn.doResponse() ## # Transform the headers into something ok for mod_python ## for header in conn.headers: ## if not header is None: ## if header[0] == 'Content-Type': ## req.content_type = header[1] ## else: ## req.headers_out[header[0]] = header[1] ## # Send our response ## req.send_http_header() ## req.write(data) diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py index 5f6795d94..d8e653706 100644 --- a/modules/websearch/lib/search_engine.py +++ b/modules/websearch/lib/search_engine.py @@ -1,5461 +1,5460 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0301 """Invenio Search Engine in mod_python.""" __lastupdated__ = """$Date$""" __revision__ = "$Id$" ## import general modules: import cgi import cStringIO import copy import string import os import re import time import urllib import urlparse import zlib import sys if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 ## import Invenio stuff: from invenio.config import \ CFG_CERN_SITE, \ CFG_INSPIRE_SITE, \ CFG_OAI_ID_FIELD, \ CFG_WEBCOMMENT_ALLOW_REVIEWS, \ CFG_WEBSEARCH_CALL_BIBFORMAT, \ CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX, \ CFG_WEBSEARCH_FIELDS_CONVERT, \ CFG_WEBSEARCH_NB_RECORDS_TO_SORT, \ CFG_WEBSEARCH_SEARCH_CACHE_SIZE, \ CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \ CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \ CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \ CFG_WEBSEARCH_FULLTEXT_SNIPPETS, \ CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \ CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \ CFG_WEBSEARCH_WILDCARD_LIMIT, \ CFG_WEBSEARCH_SYNONYM_KBRS, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_LOGDIR, \ CFG_BIBFORMAT_HIDDEN_TAGS, \ CFG_SITE_URL, \ CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \ CFG_BIBRANK_SHOW_CITATION_LINKS, \ CFG_SOLR_URL from invenio.search_engine_config import InvenioWebSearchUnknownCollectionError, InvenioWebSearchWildcardLimitError from invenio.bibrecord import create_record, record_get_field_instances from invenio.bibrank_record_sorter import get_bibrank_methods, rank_records, is_method_valid from invenio.bibrank_downloads_similarity import register_page_view_event, calculate_reading_similarity_list from invenio.bibindex_engine_stemmer import stem from invenio.bibindex_engine_tokenizer import wash_author_name, author_name_requires_phrase_search from invenio.bibformat import format_record, format_records, get_output_format_content_type, create_excel from invenio.bibformat_config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT from invenio.bibrank_downloads_grapher import create_download_history_graph_and_box from invenio.bibknowledge import get_kbr_values from invenio.data_cacher import DataCacher from invenio.websearch_external_collections import print_external_results_overview, perform_external_collection_search from invenio.access_control_admin import acc_get_action_id from invenio.access_control_config import VIEWRESTRCOLL, \ CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS from invenio.websearchadminlib import get_detailed_page_tabs from invenio.intbitset import intbitset as HitSet from invenio.dbquery import DatabaseError, deserialize_via_marshal, InvenioDbQueryWildcardLimitError from invenio.access_control_engine import acc_authorize_action from invenio.errorlib import register_exception from invenio.textutils import encode_for_xml, wash_for_utf8 +from invenio.htmlutils import get_mathjax_header import invenio.template webstyle_templates = invenio.template.load('webstyle') webcomment_templates = invenio.template.load('webcomment') from invenio.bibrank_citation_searcher import get_cited_by_count, calculate_cited_by_list, \ calculate_co_cited_with_list, get_records_with_num_cites, get_self_cited_by, \ get_refersto_hitset, get_citedby_hitset from invenio.bibrank_citation_grapher import create_citation_history_graph_and_box from invenio.dbquery import run_sql, run_sql_with_limit, \ get_table_update_time, Error from invenio.webuser import getUid, collect_user_info from invenio.webpage import pageheaderonly, pagefooteronly, create_error_box from invenio.messages import gettext_set_language from invenio.search_engine_query_parser import SearchQueryParenthesisedParser, \ SpiresToInvenioSyntaxConverter from invenio import webinterface_handler_config as apache from invenio.solrutils import solr_get_bitset try: import invenio.template websearch_templates = invenio.template.load('websearch') except: pass from invenio.websearch_external_collections import calculate_hosted_collections_results, do_calculate_hosted_collections_results from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH from invenio.websearch_external_collections_config import CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_MAXRESULTS VIEWRESTRCOLL_ID = acc_get_action_id(VIEWRESTRCOLL) ## global vars: cfg_nb_browse_seen_records = 100 # limit of the number of records to check when browsing certain collection cfg_nicely_ordered_collection_list = 0 # do we propose collection list nicely ordered or alphabetical? ## precompile some often-used regexp for speed reasons: re_word = re.compile('[\s]') re_quotes = re.compile('[\'\"]') re_doublequote = re.compile('\"') re_equal = re.compile('\=') re_logical_and = re.compile('\sand\s', re.I) re_logical_or = re.compile('\sor\s', re.I) re_logical_not = re.compile('\snot\s', re.I) re_operators = re.compile(r'\s([\+\-\|])\s') re_pattern_wildcards_after_spaces = re.compile(r'(\s)[\*\%]+') re_pattern_single_quotes = re.compile("'(.*?)'") re_pattern_double_quotes = re.compile("\"(.*?)\"") re_pattern_regexp_quotes = re.compile("\/(.*?)\/") re_pattern_spaces_after_colon = re.compile(r'(:\s+)') re_pattern_short_words = re.compile(r'([\s\"]\w{1,3})[\*\%]+') re_pattern_space = re.compile("__SPACE__") re_pattern_today = re.compile("\$TODAY\$") re_pattern_parens = re.compile(r'\([^\)]+\s+[^\)]+\)') re_unicode_lowercase_a = re.compile(unicode(r"(?u)[áàäâãå]", "utf-8")) re_unicode_lowercase_ae = re.compile(unicode(r"(?u)[æ]", "utf-8")) re_unicode_lowercase_e = re.compile(unicode(r"(?u)[éèëê]", "utf-8")) re_unicode_lowercase_i = re.compile(unicode(r"(?u)[íìïî]", "utf-8")) re_unicode_lowercase_o = re.compile(unicode(r"(?u)[óòöôõø]", "utf-8")) re_unicode_lowercase_u = re.compile(unicode(r"(?u)[úùüû]", "utf-8")) re_unicode_lowercase_y = re.compile(unicode(r"(?u)[ýÿ]", "utf-8")) re_unicode_lowercase_c = re.compile(unicode(r"(?u)[çć]", "utf-8")) re_unicode_lowercase_n = re.compile(unicode(r"(?u)[ñ]", "utf-8")) re_unicode_uppercase_a = re.compile(unicode(r"(?u)[ÁÀÄÂÃÅ]", "utf-8")) re_unicode_uppercase_ae = re.compile(unicode(r"(?u)[Æ]", "utf-8")) re_unicode_uppercase_e = re.compile(unicode(r"(?u)[ÉÈËÊ]", "utf-8")) re_unicode_uppercase_i = re.compile(unicode(r"(?u)[ÍÌÏÎ]", "utf-8")) re_unicode_uppercase_o = re.compile(unicode(r"(?u)[ÓÒÖÔÕØ]", "utf-8")) re_unicode_uppercase_u = re.compile(unicode(r"(?u)[ÚÙÜÛ]", "utf-8")) re_unicode_uppercase_y = re.compile(unicode(r"(?u)[Ý]", "utf-8")) re_unicode_uppercase_c = re.compile(unicode(r"(?u)[ÇĆ]", "utf-8")) re_unicode_uppercase_n = re.compile(unicode(r"(?u)[Ñ]", "utf-8")) re_latex_lowercase_a = re.compile("\\\\[\"H'`~^vu=k]\{?a\}?") re_latex_lowercase_ae = re.compile("\\\\ae\\{\\}?") re_latex_lowercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?e\\}?") re_latex_lowercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?i\\}?") re_latex_lowercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?o\\}?") re_latex_lowercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?u\\}?") re_latex_lowercase_y = re.compile("\\\\[\"']\\{?y\\}?") re_latex_lowercase_c = re.compile("\\\\['uc]\\{?c\\}?") re_latex_lowercase_n = re.compile("\\\\[c'~^vu]\\{?n\\}?") re_latex_uppercase_a = re.compile("\\\\[\"H'`~^vu=k]\\{?A\\}?") re_latex_uppercase_ae = re.compile("\\\\AE\\{?\\}?") re_latex_uppercase_e = re.compile("\\\\[\"H'`~^vu=k]\\{?E\\}?") re_latex_uppercase_i = re.compile("\\\\[\"H'`~^vu=k]\\{?I\\}?") re_latex_uppercase_o = re.compile("\\\\[\"H'`~^vu=k]\\{?O\\}?") re_latex_uppercase_u = re.compile("\\\\[\"H'`~^vu=k]\\{?U\\}?") re_latex_uppercase_y = re.compile("\\\\[\"']\\{?Y\\}?") re_latex_uppercase_c = re.compile("\\\\['uc]\\{?C\\}?") re_latex_uppercase_n = re.compile("\\\\[c'~^vu]\\{?N\\}?") class RestrictedCollectionDataCacher(DataCacher): def __init__(self): def cache_filler(): ret = [] try: res = run_sql("""SELECT DISTINCT ar.value FROM accROLE_accACTION_accARGUMENT raa JOIN accARGUMENT ar ON raa.id_accARGUMENT = ar.id WHERE ar.keyword = 'collection' AND raa.id_accACTION = %s""", (VIEWRESTRCOLL_ID,)) except Exception: # database problems, return empty cache return [] for coll in res: ret.append(coll[0]) return ret def timestamp_verifier(): return max(get_table_update_time('accROLE_accACTION_accARGUMENT'), get_table_update_time('accARGUMENT')) DataCacher.__init__(self, cache_filler, timestamp_verifier) def collection_restricted_p(collection, recreate_cache_if_needed=True): if recreate_cache_if_needed: restricted_collection_cache.recreate_cache_if_needed() return collection in restricted_collection_cache.cache try: restricted_collection_cache.is_ok_p except Exception: restricted_collection_cache = RestrictedCollectionDataCacher() def ziplist(*lists): """Just like zip(), but returns lists of lists instead of lists of tuples Example: zip([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) => [(f1, p1, op1), (f2, p2, op2), (f3, p3, '')] ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) => [[f1, p1, op1], [f2, p2, op2], [f3, p3, '']] FIXME: This is handy to have, and should live somewhere else, like miscutil.really_useful_functions or something. """ def l(*items): return list(items) return map(l, *lists) def get_permitted_restricted_collections(user_info, recreate_cache_if_needed=True): """Return a list of collection that are restricted but for which the user is authorized.""" if recreate_cache_if_needed: restricted_collection_cache.recreate_cache_if_needed() ret = [] for collection in restricted_collection_cache.cache: if acc_authorize_action(user_info, 'viewrestrcoll', collection=collection)[0] == 0: ret.append(collection) return ret def get_restricted_collections_for_recid(recid, recreate_cache_if_needed=True): """ Return the list of restricted collection names to which recid belongs. """ if recreate_cache_if_needed: restricted_collection_cache.recreate_cache_if_needed() collection_reclist_cache.recreate_cache_if_needed() return [collection for collection in restricted_collection_cache.cache if recid in get_collection_reclist(collection, recreate_cache_if_needed=False)] def is_user_owner_of_record(user_info, recid): """ Check if the user is owner of the record, i.e. he is the submitter and/or belongs to a owner-like group authorized to 'see' the record. @param user_info: the user_info dictionary that describe the user. @type user_info: user_info dictionary @param recid: the record identifier. @type recid: positive integer @return: True if the user is 'owner' of the record; False otherwise @rtype: bool """ authorized_emails_or_group = [] for tag in CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS: authorized_emails_or_group.extend(get_fieldvalues(recid, tag)) for email_or_group in authorized_emails_or_group: if email_or_group in user_info['group']: return True email = email_or_group.strip().lower() if user_info['email'].strip().lower() == email: return True return False def check_user_can_view_record(user_info, recid): """ Check if the user is authorized to view the given recid. The function grants access in two cases: either user has author rights on this record, or he has view rights to the primary collection this record belongs to. @param user_info: the user_info dictionary that describe the user. @type user_info: user_info dictionary @param recid: the record identifier. @type recid: positive integer @return: (0, ''), when authorization is granted, (>0, 'message') when authorization is not granted @rtype: (int, string) """ if record_public_p(recid): ## The record is already known to be public. return (0, '') ## At this point, either webcoll has not yet run or there are some ## restricted collections. Let's see first if the user own the record. if is_user_owner_of_record(user_info, recid): ## Perfect! It's authorized then! return (0, '') restricted_collections = get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False) if restricted_collections: ## If there are restricted collections the user must be authorized to all of them for collection in get_restricted_collections_for_recid(recid, recreate_cache_if_needed=False): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collection) if auth_code: ## Ouch! the user is not authorized to this collection return (auth_code, auth_msg) ## OK! The user is authorized. return (0, '') if is_record_in_any_collection(recid, recreate_cache_if_needed=False): ## the record is not in any restricted collection return (0, '') elif record_exists(recid) > 0: ## We are in the case where webcoll has not run. ## Let's authorize SUPERADMIN (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=None) if auth_code == 0: return (0, '') else: ## Too bad. Let's print a nice message: return (1, """The record you are trying to access has just been submitted to the system and needs to be assigned to the proper collections. It is currently restricted for security reasons until the assignment will be fully completed. Please come back later to properly access this record.""") else: ## The record either does not exists or has been deleted. ## Let's handle these situations outside of this code. return (0, '') class IndexStemmingDataCacher(DataCacher): """ Provides cache for stemming information for word/phrase indexes. This class is not to be used directly; use function get_index_stemming_language() instead. """ def __init__(self): def cache_filler(): try: res = run_sql("""SELECT id, stemming_language FROM idxINDEX""") except DatabaseError: # database problems, return empty cache return {} return dict(res) def timestamp_verifier(): return get_table_update_time('idxINDEX') DataCacher.__init__(self, cache_filler, timestamp_verifier) try: index_stemming_cache.is_ok_p except Exception: index_stemming_cache = IndexStemmingDataCacher() def get_index_stemming_language(index_id, recreate_cache_if_needed=True): """Return stemming langugage for given index.""" if recreate_cache_if_needed: index_stemming_cache.recreate_cache_if_needed() return index_stemming_cache.cache[index_id] class CollectionRecListDataCacher(DataCacher): """ Provides cache for collection reclist hitsets. This class is not to be used directly; use function get_collection_reclist() instead. """ def __init__(self): def cache_filler(): ret = {} try: res = run_sql("SELECT name,reclist FROM collection") except Exception: # database problems, return empty cache return {} for name, reclist in res: ret[name] = None # this will be filled later during runtime by calling get_collection_reclist(coll) return ret def timestamp_verifier(): return get_table_update_time('collection') DataCacher.__init__(self, cache_filler, timestamp_verifier) try: if not collection_reclist_cache.is_ok_p: raise Exception except Exception: collection_reclist_cache = CollectionRecListDataCacher() def get_collection_reclist(coll, recreate_cache_if_needed=True): """Return hitset of recIDs that belong to the collection 'coll'.""" if recreate_cache_if_needed: collection_reclist_cache.recreate_cache_if_needed() if not collection_reclist_cache.cache[coll]: # not yet it the cache, so calculate it and fill the cache: set = HitSet() query = "SELECT nbrecs,reclist FROM collection WHERE name=%s" res = run_sql(query, (coll, ), 1) if res: try: set = HitSet(res[0][1]) except: pass collection_reclist_cache.cache[coll] = set # finally, return reclist: return collection_reclist_cache.cache[coll] class SearchResultsCache(DataCacher): """ Provides temporary lazy cache for Search Results. Useful when users click on `next page'. """ def __init__(self): def cache_filler(): return {} def timestamp_verifier(): return '1970-01-01 00:00:00' # lazy cache is always okay; # its filling is governed by # CFG_WEBSEARCH_SEARCH_CACHE_SIZE DataCacher.__init__(self, cache_filler, timestamp_verifier) try: if not search_results_cache.is_ok_p: raise Exception except Exception: search_results_cache = SearchResultsCache() class CollectionI18nNameDataCacher(DataCacher): """ Provides cache for I18N collection names. This class is not to be used directly; use function get_coll_i18nname() instead. """ def __init__(self): def cache_filler(): ret = {} try: res = run_sql("SELECT c.name,cn.ln,cn.value FROM collectionname AS cn, collection AS c WHERE cn.id_collection=c.id AND cn.type='ln'") # ln=long name except Exception: # database problems return {} for c, ln, i18nname in res: if i18nname: if not ret.has_key(c): ret[c] = {} ret[c][ln] = i18nname return ret def timestamp_verifier(): return get_table_update_time('collectionname') DataCacher.__init__(self, cache_filler, timestamp_verifier) try: if not collection_i18nname_cache.is_ok_p: raise Exception except Exception: collection_i18nname_cache = CollectionI18nNameDataCacher() def get_coll_i18nname(c, ln=CFG_SITE_LANG, verify_cache_timestamp=True): """ Return nicely formatted collection name (of the name type `ln' (=long name)) for collection C in language LN. This function uses collection_i18nname_cache, but it verifies whether the cache is up-to-date first by default. This verification step is performed by checking the DB table update time. So, if you call this function 1000 times, it can get very slow because it will do 1000 table update time verifications, even though collection names change not that often. Hence the parameter VERIFY_CACHE_TIMESTAMP which, when set to False, will assume the cache is already up-to-date. This is useful namely in the generation of collection lists for the search results page. """ if verify_cache_timestamp: collection_i18nname_cache.recreate_cache_if_needed() out = c try: out = collection_i18nname_cache.cache[c][ln] except KeyError: pass # translation in LN does not exist return out class FieldI18nNameDataCacher(DataCacher): """ Provides cache for I18N field names. This class is not to be used directly; use function get_field_i18nname() instead. """ def __init__(self): def cache_filler(): ret = {} try: res = run_sql("SELECT f.name,fn.ln,fn.value FROM fieldname AS fn, field AS f WHERE fn.id_field=f.id AND fn.type='ln'") # ln=long name except Exception: # database problems, return empty cache return {} for f, ln, i18nname in res: if i18nname: if not ret.has_key(f): ret[f] = {} ret[f][ln] = i18nname return ret def timestamp_verifier(): return get_table_update_time('fieldname') DataCacher.__init__(self, cache_filler, timestamp_verifier) try: if not field_i18nname_cache.is_ok_p: raise Exception except Exception: field_i18nname_cache = FieldI18nNameDataCacher() def get_field_i18nname(f, ln=CFG_SITE_LANG, verify_cache_timestamp=True): """ Return nicely formatted field name (of type 'ln', 'long name') for field F in language LN. If VERIFY_CACHE_TIMESTAMP is set to True, then verify DB timestamp and field I18N name cache timestamp and refresh cache from the DB if needed. Otherwise don't bother checking DB timestamp and return the cached value. (This is useful when get_field_i18nname is called inside a loop.) """ if verify_cache_timestamp: field_i18nname_cache.recreate_cache_if_needed() out = f try: out = field_i18nname_cache.cache[f][ln] except KeyError: pass # translation in LN does not exist return out def get_alphabetically_ordered_collection_list(level=0, ln=CFG_SITE_LANG): """Returns nicely ordered (score respected) list of collections, more exactly list of tuples (collection name, printable collection name). Suitable for create_search_box().""" out = [] res = run_sql("SELECT id,name FROM collection ORDER BY name ASC") for c_id, c_name in res: # make a nice printable name (e.g. truncate c_printable for # long collection names in given language): c_printable_fullname = get_coll_i18nname(c_name, ln, False) c_printable = wash_index_term(c_printable_fullname, 30, False) if c_printable != c_printable_fullname: c_printable = c_printable + "..." if level: c_printable = " " + level * '-' + " " + c_printable out.append([c_name, c_printable]) return out def get_nicely_ordered_collection_list(collid=1, level=0, ln=CFG_SITE_LANG): """Returns nicely ordered (score respected) list of collections, more exactly list of tuples (collection name, printable collection name). Suitable for create_search_box().""" colls_nicely_ordered = [] res = run_sql("""SELECT c.name,cc.id_son FROM collection_collection AS cc, collection AS c WHERE c.id=cc.id_son AND cc.id_dad=%s ORDER BY score DESC""", (collid, )) for c, cid in res: # make a nice printable name (e.g. truncate c_printable for # long collection names in given language): c_printable_fullname = get_coll_i18nname(c, ln, False) c_printable = wash_index_term(c_printable_fullname, 30, False) if c_printable != c_printable_fullname: c_printable = c_printable + "..." if level: c_printable = " " + level * '-' + " " + c_printable colls_nicely_ordered.append([c, c_printable]) colls_nicely_ordered = colls_nicely_ordered + get_nicely_ordered_collection_list(cid, level+1, ln=ln) return colls_nicely_ordered def get_index_id_from_field(field): """ Return index id with name corresponding to FIELD, or the first index id where the logical field code named FIELD is indexed. Return zero in case there is no index defined for this field. Example: field='author', output=4. """ out = 0 if field == '': field = 'global' # empty string field means 'global' index (field 'anyfield') # first look in the index table: res = run_sql("""SELECT id FROM idxINDEX WHERE name=%s""", (field,)) if res: out = res[0][0] return out # not found in the index table, now look in the field table: res = run_sql("""SELECT w.id FROM idxINDEX AS w, idxINDEX_field AS wf, field AS f WHERE f.code=%s AND wf.id_field=f.id AND w.id=wf.id_idxINDEX LIMIT 1""", (field,)) if res: out = res[0][0] return out def get_words_from_pattern(pattern): "Returns list of whitespace-separated words from pattern." words = {} for word in string.split(pattern): if not words.has_key(word): words[word] = 1 return words.keys() def create_basic_search_units(req, p, f, m=None, of='hb'): """Splits search pattern and search field into a list of independently searchable units. - A search unit consists of '(operator, pattern, field, type, hitset)' tuples where 'operator' is set union (|), set intersection (+) or set exclusion (-); 'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics'); 'field' is either a code like 'title' or MARC tag like '100__a'; 'type' is the search type ('w' for word file search, 'a' for access file search). - Optionally, the function accepts the match type argument 'm'. If it is set (e.g. from advanced search interface), then it performs this kind of matching. If it is not set, then a guess is made. 'm' can have values: 'a'='all of the words', 'o'='any of the words', 'p'='phrase/substring', 'r'='regular expression', 'e'='exact value'. - Warnings are printed on req (when not None) in case of HTML output formats.""" opfts = [] # will hold (o,p,f,t,h) units # FIXME: quick hack for the journal index if f == 'journal': opfts.append(['+', p, f, 'w']) return opfts ## check arguments: is desired matching type set? if m: ## A - matching type is known; good! if m == 'e': # A1 - exact value: opfts.append(['+', p, f, 'a']) # '+' since we have only one unit elif m == 'p': # A2 - phrase/substring: opfts.append(['+', "%" + p + "%", f, 'a']) # '+' since we have only one unit elif m == 'r': # A3 - regular expression: opfts.append(['+', p, f, 'r']) # '+' since we have only one unit elif m == 'a' or m == 'w': # A4 - all of the words: p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed for word in get_words_from_pattern(p): opfts.append(['+', word, f, 'w']) # '+' in all units elif m == 'o': # A5 - any of the words: p = strip_accents(p) # strip accents for 'w' mode, FIXME: delete when not needed for word in get_words_from_pattern(p): if len(opfts)==0: opfts.append(['+', word, f, 'w']) # '+' in the first unit else: opfts.append(['|', word, f, 'w']) # '|' in further units else: if of.startswith("h"): print_warning(req, "Matching type '%s' is not implemented yet." % cgi.escape(m), "Warning") opfts.append(['+', "%" + p + "%", f, 'w']) else: ## B - matching type is not known: let us try to determine it by some heuristics if f and p[0] == '"' and p[-1] == '"': ## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search opfts.append(['+', p[1:-1], f, 'a']) elif f in ('author', 'firstauthor', 'exactauthor') and author_name_requires_phrase_search(p): ## B1 - do we search in author, and does 'p' contain space/comma/dot/etc? ## => doing washed ACC search opfts.append(['+', p, f, 'a']) elif f and p[0] == "'" and p[-1] == "'": ## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search opfts.append(['+', '%' + p[1:-1] + '%', f, 'a']) elif f and p[0] == "/" and p[-1] == "/": ## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search opfts.append(['+', p[1:-1], f, 'r']) elif f and string.find(p, ',') >= 0: ## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search opfts.append(['+', p, f, 'a']) elif f and str(f[0:2]).isdigit(): ## B2 - does 'f' exist and starts by two digits? => doing ACC search opfts.append(['+', p, f, 'a']) else: ## B3 - doing WRD search, but maybe ACC too # search units are separated by spaces unless the space is within single or double quotes # so, let us replace temporarily any space within quotes by '__SPACE__' p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p) p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p) p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p) # and spaces after colon as well: p = re_pattern_spaces_after_colon.sub(lambda x: string.replace(x.group(1), ' ', '__SPACE__'), p) # wash argument: p = re_equal.sub(":", p) p = re_logical_and.sub(" ", p) p = re_logical_or.sub(" |", p) p = re_logical_not.sub(" -", p) p = re_operators.sub(r' \1', p) for pi in string.split(p): # iterate through separated units (or items, as "pi" stands for "p item") pi = re_pattern_space.sub(" ", pi) # replace back '__SPACE__' by ' ' # firstly, determine set operator if pi[0] == '+' or pi[0] == '-' or pi[0] == '|': oi = pi[0] pi = pi[1:] else: # okay, there is no operator, so let us decide what to do by default oi = '+' # by default we are doing set intersection... # secondly, determine search pattern and field: if string.find(pi, ":") > 0: fi, pi = string.split(pi, ":", 1) fi = wash_field(fi) # test whether fi is a real index code or a MARC-tag defined code: if fi in get_fieldcodes() or '00' <= fi[:2] <= '99': pass else: # it is not, so join it back: fi, pi = f, fi + ":" + pi else: fi, pi = f, pi # wash 'fi' argument: fi = wash_field(fi) # wash 'pi' argument: pi = pi.strip() # strip eventual spaces if re_quotes.match(pi): # B3a - quotes are found => do ACC search (phrase search) if pi[0] == '"' and pi[-1] == '"': pi = string.replace(pi, '"', '') # remove quote signs opfts.append([oi, pi, fi, 'a']) elif pi[0] == "'" and pi[-1] == "'": pi = string.replace(pi, "'", "") # remove quote signs opfts.append([oi, "%" + pi + "%", fi, 'a']) else: # unbalanced quotes, so fall back to WRD query: opfts.append([oi, pi, fi, 'w']) elif pi.startswith('/') and pi.endswith('/'): # B3b - pi has slashes around => do regexp search opfts.append([oi, pi[1:-1], fi, 'r']) elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit(): # B3c - fi exists and starts by two digits => do ACC search opfts.append([oi, pi, fi, 'a']) elif fi and not get_index_id_from_field(fi) and get_field_name(fi): # B3d - logical field fi exists but there is no WRD index for fi => try ACC search opfts.append([oi, pi, fi, 'a']) else: # B3e - general case => do WRD search pi = strip_accents(pi) # strip accents for 'w' mode, FIXME: delete when not needed for pii in get_words_from_pattern(pi): opfts.append([oi, pii, fi, 'w']) ## sanity check: for i in range(0, len(opfts)): try: pi = opfts[i][1] if pi == '*': if of.startswith("h"): print_warning(req, "Ignoring standalone wildcard word.", "Warning") del opfts[i] if pi == '' or pi == ' ': fi = opfts[i][2] if fi: if of.startswith("h"): print_warning(req, "Ignoring empty %s search term." % fi, "Warning") del opfts[i] except: pass ## replace old logical field names if applicable: if CFG_WEBSEARCH_FIELDS_CONVERT: opfts = [[o,p,wash_field(f),t] for o,p,f,t in opfts] ## return search units: return opfts def page_start(req, of, cc, aas, ln, uid, title_message=None, description='', keywords='', recID=-1, tab='', p=''): "Start page according to given output format." _ = gettext_set_language(ln) if not req or isinstance(req, cStringIO.OutputType): return # we were called from CLI if not title_message: title_message = _("Search Results") content_type = get_output_format_content_type(of) if of.startswith('x'): if of == 'xr': # we are doing RSS output req.content_type = "application/rss+xml" req.send_http_header() req.write("""\n""") else: # we are doing XML output: req.content_type = "text/xml" req.send_http_header() req.write("""\n""") elif of.startswith('t') or str(of[0:3]).isdigit(): # we are doing plain text output: req.content_type = "text/plain" req.send_http_header() elif of == "id": pass # nothing to do, we shall only return list of recIDs elif content_type == 'text/html': # we are doing HTML output: req.content_type = "text/html" req.send_http_header() if not description: description = "%s %s." % (cc, _("Search Results")) if not keywords: keywords = "%s, WebSearch, %s" % (get_coll_i18nname(CFG_SITE_NAME, ln, False), get_coll_i18nname(cc, ln, False)) ## generate RSS URL: argd = {} if req.args: argd = cgi.parse_qs(req.args) rssurl = websearch_templates.build_rss_url(argd) ## add MathJax if displaying single records (FIXME: find ## eventual better place to this code) if of.lower() in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: - metaheaderadd = """ - -""" + metaheaderadd = get_mathjax_header() else: metaheaderadd = '' ## generate navtrail: navtrail = create_navtrail_links(cc, aas, ln) if navtrail != '': navtrail += ' > ' if (tab != '' or ((of != '' or of.lower() != 'hd') and of != 'hb')) and \ recID != -1: # If we are not in information tab in HD format, customize # the nav. trail to have a link back to main record. (Due # to the way perform_request_search() works, hb # (lowercase) is equal to hd) navtrail += ' %s' % \ (CFG_SITE_URL, recID, title_message) if (of != '' or of.lower() != 'hd') and of != 'hb': # Export format_name = of query = "SELECT name FROM format WHERE code=%s" res = run_sql(query, (of,)) if res: format_name = res[0][0] navtrail += ' > ' + format_name else: # Discussion, citations, etc. tabs tab_label = get_detailed_page_tabs(cc, ln=ln)[tab]['label'] navtrail += ' > ' + _(tab_label) else: navtrail += title_message if p: # we are serving search/browse results pages, so insert pattern: navtrail += ": " + cgi.escape(p) title_message = cgi.escape(p) + " - " + title_message ## finally, print page header: req.write(pageheaderonly(req=req, title=title_message, navtrail=navtrail, description=description, keywords=keywords, metaheaderadd=metaheaderadd, uid=uid, language=ln, navmenuid='search', navtrail_append_title_p=0, rssurl=rssurl)) req.write(websearch_templates.tmpl_search_pagestart(ln=ln)) #else: # req.send_http_header() def page_end(req, of="hb", ln=CFG_SITE_LANG): "End page according to given output format: e.g. close XML tags, add HTML footer, etc." if of == "id": return [] # empty recID list if not req: return # we were called from CLI if of.startswith('h'): req.write(websearch_templates.tmpl_search_pageend(ln = ln)) # pagebody end req.write(pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req)) return def create_page_title_search_pattern_info(p, p1, p2, p3): """Create the search pattern bit for the page web page HTML header. Basically combine p and (p1,p2,p3) together so that the page header may be filled whether we are in the Simple Search or Advanced Search interface contexts.""" out = "" if p: out = p else: out = p1 if p2: out += ' ' + p2 if p3: out += ' ' + p3 return out def create_inputdate_box(name="d1", selected_year=0, selected_month=0, selected_day=0, ln=CFG_SITE_LANG): "Produces 'From Date', 'Until Date' kind of selection box. Suitable for search options." _ = gettext_set_language(ln) box = "" # day box += """<select name="%sd">""" % name box += """<option value="">%s""" % _("any day") for day in range(1, 32): box += """<option value="%02d"%s>%02d""" % (day, is_selected(day, selected_day), day) box += """</select>""" # month box += """<select name="%sm">""" % name box += """<option value="">%s""" % _("any month") for mm, month in [(1, _("January")), (2, _("February")), (3, _("March")), (4, _("April")), \ (5, _("May")), (6, _("June")), (7, _("July")), (8, _("August")), \ (9, _("September")), (10, _("October")), (11, _("November")), (12, _("December"))]: box += """<option value="%02d"%s>%s""" % (mm, is_selected(mm, selected_month), month) box += """</select>""" # year box += """<select name="%sy">""" % name box += """<option value="">%s""" % _("any year") this_year = int(time.strftime("%Y", time.localtime())) for year in range(this_year-20, this_year+1): box += """<option value="%d"%s>%d""" % (year, is_selected(year, selected_year), year) box += """</select>""" return box def create_search_box(cc, colls, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action=""): """Create search box for 'search again in the results page' functionality.""" # load the right message language _ = gettext_set_language(ln) # some computations cc_intl = get_coll_i18nname(cc, ln, False) cc_colID = get_colID(cc) colls_nicely_ordered = [] if cfg_nicely_ordered_collection_list: colls_nicely_ordered = get_nicely_ordered_collection_list(ln=ln) else: colls_nicely_ordered = get_alphabetically_ordered_collection_list(ln=ln) colls_nice = [] for (cx, cx_printable) in colls_nicely_ordered: if not cx.startswith("Unnamed collection"): colls_nice.append({ 'value' : cx, 'text' : cx_printable }) coll_selects = [] if colls and colls[0] != CFG_SITE_NAME: # some collections are defined, so print these first, and only then print 'add another collection' heading: for c in colls: if c: temp = [] temp.append({ 'value' : CFG_SITE_NAME, 'text' : '*** %s ***' % _("any public collection") }) # this field is used to remove the current collection from the ones to be searched. temp.append({ 'value' : '', 'text' : '*** %s ***' % _("remove this collection") }) for val in colls_nice: # print collection: if not cx.startswith("Unnamed collection"): temp.append({ 'value' : val['value'], 'text' : val['text'], 'selected' : (c == re.sub("^[\s\-]*","", val['value'])) }) coll_selects.append(temp) coll_selects.append([{ 'value' : '', 'text' : '*** %s ***' % _("add another collection") }] + colls_nice) else: # we searched in CFG_SITE_NAME, so print 'any public collection' heading coll_selects.append([{ 'value' : CFG_SITE_NAME, 'text' : '*** %s ***' % _("any public collection") }] + colls_nice) ## ranking methods ranks = [{ 'value' : '', 'text' : "- %s %s -" % (_("OR").lower (), _("rank by")), }] for (code, name) in get_bibrank_methods(cc_colID, ln): # propose found rank methods: ranks.append({ 'value' : code, 'text' : name, }) formats = [] query = """SELECT code,name FROM format WHERE visibility='1' ORDER BY name ASC""" res = run_sql(query) if res: # propose found formats: for code, name in res: formats.append({ 'value' : code, 'text' : name }) else: formats.append({'value' : 'hb', 'text' : _("HTML brief") }) # show collections in the search box? (not if there is only one # collection defined, and not if we are in light search) show_colls = True show_title = True if len(collection_reclist_cache.cache.keys()) == 1 or \ aas == -1: show_colls = False show_title = False if cc == CFG_SITE_NAME: show_title = False return websearch_templates.tmpl_search_box( ln = ln, aas = aas, cc_intl = cc_intl, cc = cc, ot = ot, sp = sp, action = action, fieldslist = get_searchwithin_fields(ln=ln, colID=cc_colID), f1 = f1, f2 = f2, f3 = f3, m1 = m1, m2 = m2, m3 = m3, p1 = p1, p2 = p2, p3 = p3, op1 = op1, op2 = op2, rm = rm, p = p, f = f, coll_selects = coll_selects, d1y = d1y, d2y = d2y, d1m = d1m, d2m = d2m, d1d = d1d, d2d = d2d, dt = dt, sort_fields = get_sortby_fields(ln=ln, colID=cc_colID), sf = sf, so = so, ranks = ranks, sc = sc, rg = rg, formats = formats, of = of, pl = pl, jrec = jrec, ec = ec, show_colls = show_colls, show_title = show_title, ) def create_navtrail_links(cc=CFG_SITE_NAME, aas=0, ln=CFG_SITE_LANG, self_p=1, tab=''): """Creates navigation trail links, i.e. links to collection ancestors (except Home collection). If aas==1, then links to Advanced Search interfaces; otherwise Simple Search. """ dads = [] for dad in get_coll_ancestors(cc): if dad != CFG_SITE_NAME: # exclude Home collection dads.append ((dad, get_coll_i18nname(dad, ln, False))) if self_p and cc != CFG_SITE_NAME: dads.append((cc, get_coll_i18nname(cc, ln, False))) return websearch_templates.tmpl_navtrail_links( aas=aas, ln=ln, dads=dads) def get_searchwithin_fields(ln='en', colID=None): """Retrieves the fields name used in the 'search within' selection box for the collection ID colID.""" res = None if colID: res = run_sql("""SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='sew' AND cff.id_collection=%s AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""", (colID,)) if not res: res = run_sql("SELECT code,name FROM field ORDER BY name ASC") fields = [{ 'value' : '', 'text' : get_field_i18nname("any field", ln, False) }] for field_code, field_name in res: if field_code and field_code != "anyfield": fields.append({ 'value' : field_code, 'text' : get_field_i18nname(field_name, ln, False) }) return fields def get_sortby_fields(ln='en', colID=None): """Retrieves the fields name used in the 'sort by' selection box for the collection ID colID.""" _ = gettext_set_language(ln) res = None if colID: res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""", (colID,)) if not res: # no sort fields defined for this colID, try to take Home collection: res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='soo' AND cff.id_collection=%s AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""", (1,)) if not res: # no sort fields defined for the Home collection, take all sort fields defined wherever they are: res = run_sql("""SELECT DISTINCT(f.code),f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='soo' AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""",) fields = [{ 'value' : '', 'text' : _("latest first") }] for field_code, field_name in res: if field_code and field_code != "anyfield": fields.append({ 'value' : field_code, 'text' : get_field_i18nname(field_name, ln, False) }) return fields def create_andornot_box(name='op', value='', ln='en'): "Returns HTML code for the AND/OR/NOT selection box." _ = gettext_set_language(ln) out = """ <select name="%s"> <option value="a"%s>%s <option value="o"%s>%s <option value="n"%s>%s </select> """ % (name, is_selected('a', value), _("AND"), is_selected('o', value), _("OR"), is_selected('n', value), _("AND NOT")) return out def create_matchtype_box(name='m', value='', ln='en'): "Returns HTML code for the 'match type' selection box." _ = gettext_set_language(ln) out = """ <select name="%s"> <option value="a"%s>%s <option value="o"%s>%s <option value="e"%s>%s <option value="p"%s>%s <option value="r"%s>%s </select> """ % (name, is_selected('a', value), _("All of the words:"), is_selected('o', value), _("Any of the words:"), is_selected('e', value), _("Exact phrase:"), is_selected('p', value), _("Partial phrase:"), is_selected('r', value), _("Regular expression:")) return out def is_selected(var, fld): "Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes." if type(var) is int and type(fld) is int: if var == fld: return " selected" elif str(var) == str(fld): return " selected" elif fld and len(fld)==3 and fld[0] == "w" and var == fld[1:]: return " selected" return "" def wash_colls(cc, c, split_colls=0, verbose=0): """Wash collection list by checking whether user has deselected anything under 'Narrow search'. Checks also if cc is a list or not. Return list of cc, colls_to_display, colls_to_search since the list of collections to display is different from that to search in. This is because users might have chosen 'split by collection' functionality. The behaviour of "collections to display" depends solely whether user has deselected a particular collection: e.g. if it started from 'Articles and Preprints' page, and deselected 'Preprints', then collection to display is 'Articles'. If he did not deselect anything, then collection to display is 'Articles & Preprints'. The behaviour of "collections to search in" depends on the 'split_colls' parameter: * if is equal to 1, then we can wash the colls list down and search solely in the collection the user started from; * if is equal to 0, then we are splitting to the first level of collections, i.e. collections as they appear on the page we started to search from; The function raises exception InvenioWebSearchUnknownCollectionError if cc or one of c collections is not known. """ colls_out = [] colls_out_for_display = [] # list to hold the hosted collections to be searched and displayed hosted_colls_out = [] debug = "" if verbose: debug += "<br />" debug += "<br />1) --- initial parameters ---" debug += "<br />cc : %s" % cc debug += "<br />c : %s" % c debug += "<br />" # check what type is 'cc': if type(cc) is list: for ci in cc: if collection_reclist_cache.cache.has_key(ci): # yes this collection is real, so use it: cc = ci break else: # check once if cc is real: if not collection_reclist_cache.cache.has_key(cc): if cc: raise InvenioWebSearchUnknownCollectionError(cc) else: cc = CFG_SITE_NAME # cc is not set, so replace it with Home collection # check type of 'c' argument: if type(c) is list: colls = c else: colls = [c] if verbose: debug += "<br />2) --- after check for the integrity of cc and the being or not c a list ---" debug += "<br />cc : %s" % cc debug += "<br />c : %s" % c debug += "<br />" # remove all 'unreal' collections: colls_real = [] for coll in colls: if collection_reclist_cache.cache.has_key(coll): colls_real.append(coll) else: if coll: raise InvenioWebSearchUnknownCollectionError(coll) colls = colls_real if verbose: debug += "<br />3) --- keeping only the real colls of c ---" debug += "<br />colls : %s" % colls debug += "<br />" # check if some real collections remain: if len(colls)==0: colls = [cc] if verbose: debug += "<br />4) --- in case no colls were left we use cc directly ---" debug += "<br />colls : %s" % colls debug += "<br />" # then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll': res = run_sql("""SELECT c.name FROM collection AS c, collection_collection AS cc, collection AS ccc WHERE c.id=cc.id_son AND cc.id_dad=ccc.id AND ccc.name=%s AND cc.type='r'""", (cc,)) # list that holds all the non restricted sons of cc that are also not hosted collections l_cc_nonrestricted_sons_and_nonhosted_colls = [] res_hosted = run_sql("""SELECT c.name FROM collection AS c, collection_collection AS cc, collection AS ccc WHERE c.id=cc.id_son AND cc.id_dad=ccc.id AND ccc.name=%s AND cc.type='r' AND (c.dbquery NOT LIKE 'hostedcollection:%%' OR c.dbquery IS NULL)""", (cc,)) for row_hosted in res_hosted: l_cc_nonrestricted_sons_and_nonhosted_colls.append(row_hosted[0]) l_cc_nonrestricted_sons_and_nonhosted_colls.sort() l_cc_nonrestricted_sons = [] l_c = colls for row in res: if not collection_restricted_p(row[0]): l_cc_nonrestricted_sons.append(row[0]) l_c.sort() l_cc_nonrestricted_sons.sort() if l_cc_nonrestricted_sons == l_c: colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc' # the following elif is a hack that preserves the above funcionality when we start searching from # the frontpage with some hosted collections deselected (either by default or manually) elif set(l_cc_nonrestricted_sons_and_nonhosted_colls).issubset(set(l_c)): colls_out_for_display = colls split_colls = 0 else: colls_out_for_display = colls # nope, we need to display all 'colls' successively # remove duplicates: #colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1)) #colls_out_for_display = map(lambda x, colls_out_for_display=colls_out_for_display:colls_out_for_display[x-1], colls_out_for_display_nondups) colls_out_for_display = list(set(colls_out_for_display)) if verbose: debug += "<br />5) --- decide whether colls_out_for_diplay should be colls or is it sufficient for it to be cc; remove duplicates ---" debug += "<br />colls_out_for_display : %s" % colls_out_for_display debug += "<br />" # the following piece of code takes care of removing collections whose ancestors are going to be searched anyway # list to hold the collections to be removed colls_to_be_removed = [] # first calculate the collections that can safely be removed for coll in colls_out_for_display: for ancestor in get_coll_ancestors(coll): #if ancestor in colls_out_for_display: colls_to_be_removed.append(coll) if ancestor in colls_out_for_display and not is_hosted_collection(coll): colls_to_be_removed.append(coll) # secondly remove the collections for coll in colls_to_be_removed: colls_out_for_display.remove(coll) if verbose: debug += "<br />6) --- remove collections that have ancestors about to be search, unless they are hosted ---" debug += "<br />colls_out_for_display : %s" % colls_out_for_display debug += "<br />" # calculate the hosted collections to be searched. if colls_out_for_display == [cc]: if is_hosted_collection(cc): hosted_colls_out.append(cc) else: for coll in get_coll_sons(cc): if is_hosted_collection(coll): hosted_colls_out.append(coll) else: for coll in colls_out_for_display: if is_hosted_collection(coll): hosted_colls_out.append(coll) if verbose: debug += "<br />7) --- calculate the hosted_colls_out ---" debug += "<br />hosted_colls_out : %s" % hosted_colls_out debug += "<br />" # second, let us decide on collection splitting: if split_colls == 0: # type A - no sons are wanted colls_out = colls_out_for_display else: # type B - sons (first-level descendants) are wanted for coll in colls_out_for_display: coll_sons = get_coll_sons(coll) if coll_sons == []: colls_out.append(coll) else: for coll_son in coll_sons: if not is_hosted_collection(coll_son): colls_out.append(coll_son) #else: # colls_out = colls_out + coll_sons # remove duplicates: #colls_out_nondups=filter(lambda x, colls_out=colls_out: colls_out[x-1] not in colls_out[x:], range(1, len(colls_out)+1)) #colls_out = map(lambda x, colls_out=colls_out:colls_out[x-1], colls_out_nondups) colls_out = list(set(colls_out)) if verbose: debug += "<br />8) --- calculate the colls_out; remove duplicates ---" debug += "<br />colls_out : %s" % colls_out debug += "<br />" # remove the hosted collections from the collections to be searched if hosted_colls_out: for coll in hosted_colls_out: try: colls_out.remove(coll) except ValueError: # in case coll was not found in colls_out pass if verbose: debug += "<br />9) --- remove the hosted_colls from the colls_out ---" debug += "<br />colls_out : %s" % colls_out return (cc, colls_out_for_display, colls_out, hosted_colls_out, debug) def strip_accents(x): """Strip accents in the input phrase X (assumed in UTF-8) by replacing accented characters with their unaccented cousins (e.g. é by e). Return such a stripped X.""" x = re_latex_lowercase_a.sub("a", x) x = re_latex_lowercase_ae.sub("ae", x) x = re_latex_lowercase_e.sub("e", x) x = re_latex_lowercase_i.sub("i", x) x = re_latex_lowercase_o.sub("o", x) x = re_latex_lowercase_u.sub("u", x) x = re_latex_lowercase_y.sub("x", x) x = re_latex_lowercase_c.sub("c", x) x = re_latex_lowercase_n.sub("n", x) x = re_latex_uppercase_a.sub("A", x) x = re_latex_uppercase_ae.sub("AE", x) x = re_latex_uppercase_e.sub("E", x) x = re_latex_uppercase_i.sub("I", x) x = re_latex_uppercase_o.sub("O", x) x = re_latex_uppercase_u.sub("U", x) x = re_latex_uppercase_y.sub("Y", x) x = re_latex_uppercase_c.sub("C", x) x = re_latex_uppercase_n.sub("N", x) # convert input into Unicode string: try: y = unicode(x, "utf-8") except: return x # something went wrong, probably the input wasn't UTF-8 # asciify Latin-1 lowercase characters: y = re_unicode_lowercase_a.sub("a", y) y = re_unicode_lowercase_ae.sub("ae", y) y = re_unicode_lowercase_e.sub("e", y) y = re_unicode_lowercase_i.sub("i", y) y = re_unicode_lowercase_o.sub("o", y) y = re_unicode_lowercase_u.sub("u", y) y = re_unicode_lowercase_y.sub("y", y) y = re_unicode_lowercase_c.sub("c", y) y = re_unicode_lowercase_n.sub("n", y) # asciify Latin-1 uppercase characters: y = re_unicode_uppercase_a.sub("A", y) y = re_unicode_uppercase_ae.sub("AE", y) y = re_unicode_uppercase_e.sub("E", y) y = re_unicode_uppercase_i.sub("I", y) y = re_unicode_uppercase_o.sub("O", y) y = re_unicode_uppercase_u.sub("U", y) y = re_unicode_uppercase_y.sub("Y", y) y = re_unicode_uppercase_c.sub("C", y) y = re_unicode_uppercase_n.sub("N", y) # return UTF-8 representation of the Unicode string: return y.encode("utf-8") def wash_index_term(term, max_char_length=50, lower_term=True): """ Return washed form of the index term TERM that would be suitable for storing into idxWORD* tables. I.e., lower the TERM if LOWER_TERM is True, and truncate it safely to MAX_CHAR_LENGTH UTF-8 characters (meaning, in principle, 4*MAX_CHAR_LENGTH bytes). The function works by an internal conversion of TERM, when needed, from its input Python UTF-8 binary string format into Python Unicode format, and then truncating it safely to the given number of UTF-8 characters, without possible mis-truncation in the middle of a multi-byte UTF-8 character that could otherwise happen if we would have been working with UTF-8 binary representation directly. Note that MAX_CHAR_LENGTH corresponds to the length of the term column in idxINDEX* tables. """ if lower_term: washed_term = unicode(term, 'utf-8').lower() else: washed_term = unicode(term, 'utf-8') if len(washed_term) <= max_char_length: # no need to truncate the term, because it will fit # nicely even if it uses four-byte UTF-8 characters return washed_term.encode('utf-8') else: # truncate the term in a safe position: return washed_term[:max_char_length].encode('utf-8') def lower_index_term(term): """ Return safely lowered index term TERM. This is done by converting to UTF-8 first, because standard Python lower() function is not UTF-8 safe. To be called by both the search engine and the indexer when appropriate (e.g. before stemming). In case of problems with UTF-8 compliance, this function raises UnicodeDecodeError, so the client code may want to catch it. """ return unicode(term, 'utf-8').lower().encode('utf-8') def get_synonym_terms(term, kbr_name, match_type): """ Return list of synonyms for TERM by looking in KBR_NAME in MATCH_TYPE style. @param term: search-time term or index-time term @type term: str @param kbr_name: knowledge base name @type kbr_name: str @param match_type: specifies how the term matches against the KBR before doing the lookup. Could be `exact' (default), 'leading_to_comma', `leading_to_number'. @type match_type: str @return: list of term synonyms @rtype: list of strings """ dterms = {} ## exact match is default: term_for_lookup = term term_remainder = '' ## but maybe match different term: if match_type == 'leading_to_comma': mmm = re.match(r'^(.*?)(\s*,.*)$', term) if mmm: term_for_lookup = mmm.group(1) term_remainder = mmm.group(2) elif match_type == 'leading_to_number': mmm = re.match(r'^(.*?)(\s*\d.*)$', term) if mmm: term_for_lookup = mmm.group(1) term_remainder = mmm.group(2) ## FIXME: workaround: escaping SQL wild-card signs, since KBR's ## exact search is doing LIKE query, so would match everything: term_for_lookup = term_for_lookup.replace('%', '\%') ## OK, now find synonyms: for kbr_values in get_kbr_values(kbr_name, searchkey=term_for_lookup, searchtype='e'): for kbr_value in kbr_values: dterms[kbr_value + term_remainder] = 1 ## return list of term synonyms: return dterms.keys() def wash_output_format(format): """Wash output format FORMAT. Currently only prevents input like 'of=9' for backwards-compatible format that prints certain fields only. (for this task, 'of=tm' is preferred)""" if str(format[0:3]).isdigit() and len(format) != 6: # asked to print MARC tags, but not enough digits, # so let's switch back to HTML brief default return 'hb' else: return format def wash_pattern(p): """Wash pattern passed by URL. Check for sanity of the wildcard by removing wildcards if they are appended to extremely short words (1-3 letters). TODO: instead of this approximative treatment, it will be much better to introduce a temporal limit, e.g. to kill a query if it does not finish in 10 seconds.""" # strip accents: # p = strip_accents(p) # FIXME: when available, strip accents all the time # add leading/trailing whitespace for the two following wildcard-sanity checking regexps: p = " " + p + " " # replace spaces within quotes by __SPACE__ temporarily: p = re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p) p = re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", p) p = re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", p) # get rid of unquoted wildcards after spaces: p = re_pattern_wildcards_after_spaces.sub("\\1", p) # get rid of extremely short words (1-3 letters with wildcards): #p = re_pattern_short_words.sub("\\1", p) # replace back __SPACE__ by spaces: p = re_pattern_space.sub(" ", p) # replace special terms: p = re_pattern_today.sub(time.strftime("%Y-%m-%d", time.localtime()), p) # remove unnecessary whitespace: p = string.strip(p) # remove potentially wrong UTF-8 characters: p = wash_for_utf8(p) return p def wash_field(f): """Wash field passed by URL.""" if f: # get rid of unnecessary whitespace and make it lowercase # (e.g. Author -> author) to better suit iPhone etc input # mode: f = f.strip().lower() # wash legacy 'f' field names, e.g. replace 'wau' or `au' by # 'author', if applicable: if CFG_WEBSEARCH_FIELDS_CONVERT: f = CFG_WEBSEARCH_FIELDS_CONVERT.get(f, f) return f def wash_dates(d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0): """ Take user-submitted date arguments D1 (full datetime string) or (D1Y, D1M, D1Y) year, month, day tuple and D2 or (D2Y, D2M, D2Y) and return (YYY1-M1-D2 H1:M1:S2, YYY2-M2-D2 H2:M2:S2) datetime strings in the YYYY-MM-DD HH:MM:SS format suitable for time restricted searching. Note that when both D1 and (D1Y, D1M, D1D) parameters are present, the precedence goes to D1. Ditto for D2*. Note that when (D1Y, D1M, D1D) are taken into account, some values may be missing and are completed e.g. to 01 or 12 according to whether it is the starting or the ending date. """ datetext1, datetext2 = "", "" # sanity checking: if d1 == "" and d1y == 0 and d1m == 0 and d1d == 0 and d2 == "" and d2y == 0 and d2m == 0 and d2d == 0: return ("", "") # nothing selected, so return empty values # wash first (starting) date: if d1: # full datetime string takes precedence: datetext1 = d1 else: # okay, first date passed as (year,month,day): if d1y: datetext1 += "%04d" % d1y else: datetext1 += "0000" if d1m: datetext1 += "-%02d" % d1m else: datetext1 += "-01" if d1d: datetext1 += "-%02d" % d1d else: datetext1 += "-01" datetext1 += " 00:00:00" # wash second (ending) date: if d2: # full datetime string takes precedence: datetext2 = d2 else: # okay, second date passed as (year,month,day): if d2y: datetext2 += "%04d" % d2y else: datetext2 += "9999" if d2m: datetext2 += "-%02d" % d2m else: datetext2 += "-12" if d2d: datetext2 += "-%02d" % d2d else: datetext2 += "-31" # NOTE: perhaps we should add max(datenumber) in # given month, but for our quering it's not # needed, 31 will always do datetext2 += " 00:00:00" # okay, return constructed YYYY-MM-DD HH:MM:SS datetexts: return (datetext1, datetext2) def is_hosted_collection(coll): """Check if the given collection is a hosted one; i.e. its dbquery starts with hostedcollection: Returns True if it is, False if it's not or if the result is empty or if the query failed""" res = run_sql("SELECT dbquery FROM collection WHERE name=%s", (coll, )) try: return res[0][0].startswith("hostedcollection:") except: return False def get_colID(c): "Return collection ID for collection name C. Return None if no match found." colID = None res = run_sql("SELECT id FROM collection WHERE name=%s", (c,), 1) if res: colID = res[0][0] return colID def get_coll_ancestors(coll): "Returns a list of ancestors for collection 'coll'." coll_ancestors = [] coll_ancestor = coll while 1: res = run_sql("""SELECT c.name FROM collection AS c LEFT JOIN collection_collection AS cc ON c.id=cc.id_dad LEFT JOIN collection AS ccc ON ccc.id=cc.id_son WHERE ccc.name=%s ORDER BY cc.id_dad ASC LIMIT 1""", (coll_ancestor,)) if res: coll_name = res[0][0] coll_ancestors.append(coll_name) coll_ancestor = coll_name else: break # ancestors found, return reversed list: coll_ancestors.reverse() return coll_ancestors def get_coll_sons(coll, type='r', public_only=1): """Return a list of sons (first-level descendants) of type 'type' for collection 'coll'. If public_only, then return only non-restricted son collections. """ coll_sons = [] query = "SELECT c.name FROM collection AS c "\ "LEFT JOIN collection_collection AS cc ON c.id=cc.id_son "\ "LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad "\ "WHERE cc.type=%s AND ccc.name=%s" query += " ORDER BY cc.score DESC" res = run_sql(query, (type, coll)) for name in res: if not public_only or not collection_restricted_p(name[0]): coll_sons.append(name[0]) return coll_sons def get_coll_real_descendants(coll, type='_', get_hosted_colls=True): """Return a list of all descendants of collection 'coll' that are defined by a 'dbquery'. IOW, we need to decompose compound collections like "A & B" into "A" and "B" provided that "A & B" has no associated database query defined. """ coll_sons = [] res = run_sql("""SELECT c.name,c.dbquery FROM collection AS c LEFT JOIN collection_collection AS cc ON c.id=cc.id_son LEFT JOIN collection AS ccc ON ccc.id=cc.id_dad WHERE ccc.name=%s AND cc.type LIKE %s ORDER BY cc.score DESC""", (coll, type,)) for name, dbquery in res: if dbquery: # this is 'real' collection, so return it: if get_hosted_colls: coll_sons.append(name) else: if not dbquery.startswith("hostedcollection:"): coll_sons.append(name) else: # this is 'composed' collection, so recurse: coll_sons.extend(get_coll_real_descendants(name)) return coll_sons def browse_pattern(req, colls, p, f, rg, ln=CFG_SITE_LANG): """Browse either biliographic phrases or words indexes, and display it.""" # load the right message language _ = gettext_set_language(ln) ## is p enclosed in quotes? (coming from exact search) if p.startswith('"') and p.endswith('"'): p = p[1:-1] p_orig = p ## okay, "real browse" follows: ## FIXME: the maths in the get_nearest_terms_in_bibxxx is just a test if not f and string.find(p, ":") > 0: # does 'p' contain ':'? f, p = string.split(p, ":", 1) ## do we search in words indexes? if not f: return browse_in_bibwords(req, p, f) index_id = get_index_id_from_field(f) if index_id != 0: coll = HitSet() for coll_name in colls: coll |= get_collection_reclist(coll_name) browsed_phrases_in_colls = get_nearest_terms_in_idxphrase_with_collection(p, index_id, rg/2, rg/2, coll) else: browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1) while not browsed_phrases: # try again and again with shorter and shorter pattern: try: p = p[:-1] browsed_phrases = get_nearest_terms_in_bibxxx(p, f, (rg+1)/2+1, (rg-1)/2+1) except: # probably there are no hits at all: req.write(_("No values found.")) return ## try to check hits in these particular collection selection: browsed_phrases_in_colls = [] if 0: for phrase in browsed_phrases: phrase_hitset = HitSet() phrase_hitsets = search_pattern("", phrase, f, 'e') for coll in colls: phrase_hitset.union_update(phrase_hitsets[coll]) if len(phrase_hitset) > 0: # okay, this phrase has some hits in colls, so add it: browsed_phrases_in_colls.append([phrase, len(phrase_hitset)]) ## were there hits in collections? if browsed_phrases_in_colls == []: if browsed_phrases != []: #print_warning(req, """<p>No match close to <em>%s</em> found in given collections. #Please try different term.<p>Displaying matches in any collection...""" % p_orig) ## try to get nbhits for these phrases in any collection: for phrase in browsed_phrases: browsed_phrases_in_colls.append([phrase, get_nbhits_in_bibxxx(phrase, f)]) ## display results now: out = websearch_templates.tmpl_browse_pattern( f=f, fn=get_field_i18nname(get_field_name(f) or f, ln, False), ln=ln, browsed_phrases_in_colls=browsed_phrases_in_colls, colls=colls, rg=rg, ) req.write(out) return def browse_in_bibwords(req, p, f, ln=CFG_SITE_LANG): """Browse inside words indexes.""" if not p: return _ = gettext_set_language(ln) urlargd = {} urlargd.update(req.argd) urlargd['action'] = 'search' nearest_box = create_nearest_terms_box(urlargd, p, f, 'w', ln=ln, intro_text_p=0) req.write(websearch_templates.tmpl_search_in_bibwords( p = p, f = f, ln = ln, nearest_box = nearest_box )) return def search_pattern(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0): """Search for complex pattern 'p' within field 'f' according to matching type 'm'. Return hitset of recIDs. The function uses multi-stage searching algorithm in case of no exact match found. See the Search Internals document for detailed description. The 'ap' argument governs whether an alternative patterns are to be used in case there is no direct hit for (p,f,m). For example, whether to replace non-alphanumeric characters by spaces if it would give some hits. See the Search Internals document for detailed description. (ap=0 forbits the alternative pattern usage, ap=1 permits it.) The 'of' argument governs whether to print or not some information to the user in case of no match found. (Usually it prints the information in case of HTML formats, otherwise it's silent). The 'verbose' argument controls the level of debugging information to be printed (0=least, 9=most). All the parameters are assumed to have been previously washed. This function is suitable as a mid-level API. """ _ = gettext_set_language(ln) hitset_empty = HitSet() # sanity check: if not p: hitset_full = HitSet(trailing_bits=1) hitset_full.discard(0) # no pattern, so return all universe return hitset_full # search stage 1: break up arguments into basic search units: if verbose and of.startswith("h"): t1 = os.times()[4] basic_search_units = create_basic_search_units(req, p, f, m, of) if verbose and of.startswith("h"): t2 = os.times()[4] print_warning(req, "Search stage 1: basic search units are: %s" % cgi.escape(repr(basic_search_units))) print_warning(req, "Search stage 1: execution took %.2f seconds." % (t2 - t1)) # search stage 2: do search for each search unit and verify hit presence: if verbose and of.startswith("h"): t1 = os.times()[4] basic_search_units_hitsets = [] #prepare hiddenfield-related.. myhiddens = CFG_BIBFORMAT_HIDDEN_TAGS can_see_hidden = False if req: user_info = collect_user_info(req) can_see_hidden = (acc_authorize_action(user_info, 'runbibedit')[0] == 0) if can_see_hidden: myhiddens = [] if CFG_INSPIRE_SITE and of.startswith('h'): # fulltext/caption search warnings for INSPIRE: fields_to_be_searched = [f for o,p,f,m in basic_search_units] if 'fulltext' in fields_to_be_searched: print_warning(req, _("Warning: full-text search is only available for a subset of papers mostly from 2006-2011.")) elif 'caption' in fields_to_be_searched: print_warning(req, _("Warning: figure caption search is only available for a subset of papers mostly from 2008-2011.")) for idx_unit in xrange(len(basic_search_units)): bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit] try: basic_search_unit_hitset = search_unit(bsu_p, bsu_f, bsu_m, wl) except InvenioWebSearchWildcardLimitError, excp: basic_search_unit_hitset = excp.res if of.startswith("h"): print_warning(req, "Search term too generic, displaying only partial results...") # FIXME: print warning if we use native full-text indexing if bsu_f == 'fulltext' and bsu_m != 'w' and of.startswith('h') and not CFG_SOLR_URL: print_warning(req, _("No phrase index available for fulltext yet, looking for word combination...")) #check that the user is allowed to search with this tag #if he/she tries it if bsu_f and len(bsu_f) > 1 and bsu_f[0].isdigit() and bsu_f[1].isdigit(): for htag in myhiddens: ltag = len(htag) samelenfield = bsu_f[0:ltag] if samelenfield == htag: #user searches by a hidden tag #we won't show you anything.. basic_search_unit_hitset = HitSet() if verbose >= 9 and of.startswith("h"): print_warning(req, "Pattern %s hitlist omitted since \ it queries in a hidden tag %s" % (repr(bsu_p), repr(myhiddens))) display_nearest_terms_box=False #..and stop spying, too. if verbose >= 9 and of.startswith("h"): print_warning(req, "Search stage 1: pattern %s gave hitlist %s" % (cgi.escape(bsu_p), basic_search_unit_hitset)) if len(basic_search_unit_hitset) > 0 or \ ap==0 or \ bsu_o=="|" or \ ((idx_unit+1)<len(basic_search_units) and basic_search_units[idx_unit+1][0]=="|"): # stage 2-1: this basic search unit is retained, since # either the hitset is non-empty, or the approximate # pattern treatment is switched off, or the search unit # was joined by an OR operator to preceding/following # units so we do not require that it exists basic_search_units_hitsets.append(basic_search_unit_hitset) else: # stage 2-2: no hits found for this search unit, try to replace non-alphanumeric chars inside pattern: if re.search(r'[^a-zA-Z0-9\s\:]', bsu_p) and bsu_f != 'refersto' and bsu_f != 'citedby': if bsu_p.startswith('"') and bsu_p.endswith('"'): # is it ACC query? bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', "*", bsu_p) else: # it is WRD query bsu_pn = re.sub(r'[^a-zA-Z0-9\s\:]+', " ", bsu_p) if verbose and of.startswith('h') and req: print_warning(req, "Trying (%s,%s,%s)" % (cgi.escape(bsu_pn), cgi.escape(bsu_f), cgi.escape(bsu_m))) basic_search_unit_hitset = search_pattern(req=None, p=bsu_pn, f=bsu_f, m=bsu_m, of="id", ln=ln, wl=wl) if len(basic_search_unit_hitset) > 0: # we retain the new unit instead if of.startswith('h'): print_warning(req, _("No exact match found for %(x_query1)s, using %(x_query2)s instead...") % \ {'x_query1': "<em>" + cgi.escape(bsu_p) + "</em>", 'x_query2': "<em>" + cgi.escape(bsu_pn) + "</em>"}) basic_search_units[idx_unit][1] = bsu_pn basic_search_units_hitsets.append(basic_search_unit_hitset) else: # stage 2-3: no hits found either, propose nearest indexed terms: if of.startswith('h') and display_nearest_terms_box: if req: if bsu_f == "recid": print_warning(req, _("Requested record does not seem to exist.")) else: print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln)) return hitset_empty else: # stage 2-3: no hits found either, propose nearest indexed terms: if of.startswith('h') and display_nearest_terms_box: if req: if bsu_f == "recid": print_warning(req, _("Requested record does not seem to exist.")) else: print_warning(req, create_nearest_terms_box(req.argd, bsu_p, bsu_f, bsu_m, ln=ln)) return hitset_empty if verbose and of.startswith("h"): t2 = os.times()[4] for idx_unit in range(0, len(basic_search_units)): print_warning(req, "Search stage 2: basic search unit %s gave %d hits." % (basic_search_units[idx_unit][1:], len(basic_search_units_hitsets[idx_unit]))) print_warning(req, "Search stage 2: execution took %.2f seconds." % (t2 - t1)) # search stage 3: apply boolean query for each search unit: if verbose and of.startswith("h"): t1 = os.times()[4] # let the initial set be the complete universe: hitset_in_any_collection = HitSet(trailing_bits=1) hitset_in_any_collection.discard(0) for idx_unit in xrange(len(basic_search_units)): this_unit_operation = basic_search_units[idx_unit][0] this_unit_hitset = basic_search_units_hitsets[idx_unit] if this_unit_operation == '+': hitset_in_any_collection.intersection_update(this_unit_hitset) elif this_unit_operation == '-': hitset_in_any_collection.difference_update(this_unit_hitset) elif this_unit_operation == '|': hitset_in_any_collection.union_update(this_unit_hitset) else: if of.startswith("h"): print_warning(req, "Invalid set operation %s." % cgi.escape(this_unit_operation), "Error") if len(hitset_in_any_collection) == 0: # no hits found, propose alternative boolean query: if of.startswith('h') and display_nearest_terms_box: nearestterms = [] for idx_unit in range(0, len(basic_search_units)): bsu_o, bsu_p, bsu_f, bsu_m = basic_search_units[idx_unit] if bsu_p.startswith("%") and bsu_p.endswith("%"): bsu_p = "'" + bsu_p[1:-1] + "'" bsu_nbhits = len(basic_search_units_hitsets[idx_unit]) # create a similar query, but with the basic search unit only argd = {} argd.update(req.argd) argd['p'] = bsu_p argd['f'] = bsu_f nearestterms.append((bsu_p, bsu_nbhits, argd)) text = websearch_templates.tmpl_search_no_boolean_hits( ln=ln, nearestterms=nearestterms) print_warning(req, text) if verbose and of.startswith("h"): t2 = os.times()[4] print_warning(req, "Search stage 3: boolean query gave %d hits." % len(hitset_in_any_collection)) print_warning(req, "Search stage 3: execution took %.2f seconds." % (t2 - t1)) return hitset_in_any_collection def search_pattern_parenthesised(req=None, p=None, f=None, m=None, ap=0, of="id", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True, wl=0): """Search for complex pattern 'p' containing parenthesis within field 'f' according to matching type 'm'. Return hitset of recIDs. For more details on the parameters see 'search_pattern' """ _ = gettext_set_language(ln) spires_syntax_converter = SpiresToInvenioSyntaxConverter() spires_syntax_query = False # if the pattern uses SPIRES search syntax, convert it to Invenio syntax if spires_syntax_converter.is_applicable(p): spires_syntax_query = True p = spires_syntax_converter.convert_query(p) # sanity check: do not call parenthesised parser for search terms # like U(1): if not re_pattern_parens.search(p): return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl) # Try searching with parentheses try: parser = SearchQueryParenthesisedParser() # get a hitset with all recids result_hitset = HitSet(trailing_bits=1) # parse the query. The result is list of [op1, expr1, op2, expr2, ..., opN, exprN] parsing_result = parser.parse_query(p) if verbose and of.startswith("h"): print_warning(req, "Search stage 1: search_pattern_parenthesised() returned %s." % repr(parsing_result)) # go through every pattern # calculate hitset for it # combine pattern's hitset with the result using the corresponding operator for index in xrange(0, len(parsing_result)-1, 2 ): current_operator = parsing_result[index] current_pattern = parsing_result[index+1] if CFG_INSPIRE_SITE and spires_syntax_query: # setting ap=0 to turn off approximate matching for 0 results. # Doesn't work well in combinations. # FIXME: The right fix involves collecting statuses for each # hitset, then showing a nearest terms box exactly once, # outside this loop. ap = 0 display_nearest_terms_box=False # obtain a hitset for the current pattern current_hitset = search_pattern(req, current_pattern, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl) # combine the current hitset with resulting hitset using the current operator if current_operator == '+': result_hitset = result_hitset & current_hitset elif current_operator == '-': result_hitset = result_hitset - current_hitset elif current_operator == '|': result_hitset = result_hitset | current_hitset else: assert False, "Unknown operator in search_pattern_parenthesised()" return result_hitset # If searching with parenteses fails, perform search ignoring parentheses except SyntaxError: print_warning(req, _("Search syntax misunderstood. Ignoring all parentheses in the query. If this doesn't help, please check your search and try again.")) # remove the parentheses in the query. Current implementation removes all the parentheses, # but it could be improved to romove only these that are not inside quotes p = p.replace('(', ' ') p = p.replace(')', ' ') return search_pattern(req, p, f, m, ap, of, verbose, ln, display_nearest_terms_box=display_nearest_terms_box, wl=wl) def search_unit(p, f=None, m=None, wl=0): """Search for basic search unit defined by pattern 'p' and field 'f' and matching type 'm'. Return hitset of recIDs. All the parameters are assumed to have been previously washed. 'p' is assumed to be already a ``basic search unit'' so that it is searched as such and is not broken up in any way. Only wildcard and span queries are being detected inside 'p'. If CFG_WEBSEARCH_SYNONYM_KBRS is set and we are searching in one of the indexes that has defined runtime synonym knowledge base, then look up there and automatically enrich search results with results for synonyms. In case the wildcard limit (wl) is greater than 0 and this limit is reached an InvenioWebSearchWildcardLimitError will be raised. In case you want to call this function with no limit for the wildcard queries, wl should be 0. This function is suitable as a low-level API. """ ## create empty output results set: hitset = HitSet() if not p: # sanity checking return hitset ## eventually look up runtime synonyms: hitset_synonyms = HitSet() if CFG_WEBSEARCH_SYNONYM_KBRS.has_key(f): for p_synonym in get_synonym_terms(p, CFG_WEBSEARCH_SYNONYM_KBRS[f][0], CFG_WEBSEARCH_SYNONYM_KBRS[f][1]): if p_synonym != p: hitset_synonyms |= search_unit(p_synonym, f, m, wl) ## look up hits: if CFG_SOLR_URL and f == 'fulltext': # redirect to Solr/Lucene return search_unit_in_solr(p, f, m) if f == 'datecreated': hitset = search_unit_in_bibrec(p, p, 'c') elif f == 'datemodified': hitset = search_unit_in_bibrec(p, p, 'm') elif f == 'refersto': # we are doing search by the citation count hitset = search_unit_refersto(p) elif f == 'citedby': # we are doing search by the citation count hitset = search_unit_citedby(p) elif m == 'a' or m == 'r': # we are doing either phrase search or regexp search if f == 'fulltext': # FIXME: workaround for not having phrase index yet return search_pattern(None, p, f, 'w') index_id = get_index_id_from_field(f) if index_id != 0: hitset = search_unit_in_idxphrases(p, f, m, wl) else: hitset = search_unit_in_bibxxx(p, f, m, wl) elif p.startswith("cited:"): # we are doing search by the citation count hitset = search_unit_by_times_cited(p[6:]) else: # we are doing bibwords search by default hitset = search_unit_in_bibwords(p, f, m, wl=wl) ## merge synonym results and return total: hitset |= hitset_synonyms return hitset def search_unit_in_bibwords(word, f, m=None, decompress=zlib.decompress, wl=0): """Searches for 'word' inside bibwordsX table for field 'f' and returns hitset of recIDs.""" set = HitSet() # will hold output result set set_used = 0 # not-yet-used flag, to be able to circumvent set operations limit_reached = 0 # flag for knowing if the query limit has been reached # deduce into which bibwordsX table we will search: stemming_language = get_index_stemming_language(get_index_id_from_field("anyfield")) bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: bibwordsX = "idxWORD%02dF" % index_id stemming_language = get_index_stemming_language(index_id) else: return HitSet() # word index f does not exist # wash 'word' argument and run query: word = string.replace(word, '*', '%') # we now use '*' as the truncation character words = string.split(word, "->", 1) # check for span query if len(words) == 2: word0 = re_word.sub('', words[0]) word1 = re_word.sub('', words[1]) if stemming_language: word0 = lower_index_term(word0) word1 = lower_index_term(word1) word0 = stem(word0, stemming_language) word1 = stem(word1, stemming_language) try: res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term BETWEEN %%s AND %%s" % bibwordsX, (wash_index_term(word0), wash_index_term(word1)), wildcard_limit = wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: if f == 'journal': pass # FIXME: quick hack for the journal index else: word = re_word.sub('', word) if stemming_language: word = lower_index_term(word) word = stem(word, stemming_language) if string.find(word, '%') >= 0: # do we have wildcard in the word? if f == 'journal': # FIXME: quick hack for the journal index # FIXME: we can run a sanity check here for all indexes res = () else: try: res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term LIKE %%s" % bibwordsX, (wash_index_term(word),), wildcard_limit = wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: res = run_sql("SELECT term,hitlist FROM %s WHERE term=%%s" % bibwordsX, (wash_index_term(word),)) # fill the result set: for word, hitlist in res: hitset_bibwrd = HitSet(hitlist) # add the results: if set_used: set.union_update(hitset_bibwrd) else: set = hitset_bibwrd set_used = 1 #check to see if the query limit was reached if limit_reached: #raise an exception, so we can print a nice message to the user raise InvenioWebSearchWildcardLimitError(set) # okay, return result set: return set def search_unit_in_idxphrases(p, f, type, wl=0): """Searches for phrase 'p' inside idxPHRASE*F table for field 'f' and returns hitset of recIDs found. The search type is defined by 'type' (e.g. equals to 'r' for a regexp search).""" set = HitSet() # will hold output result set set_used = 0 # not-yet-used flag, to be able to circumvent set operations limit_reached = 0 # flag for knowing if the query limit has been reached use_query_limit = False # flag for knowing if to limit the query results or not # deduce in which idxPHRASE table we will search: idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: idxphraseX = "idxPHRASE%02dF" % index_id else: return HitSet() # phrase index f does not exist # detect query type (exact phrase, partial phrase, regexp): if type == 'r': query_addons = "REGEXP %s" query_params = (p,) use_query_limit = True else: p = string.replace(p, '*', '%') # we now use '*' as the truncation character ps = string.split(p, "->", 1) # check for span query: if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')): query_addons = "BETWEEN %s AND %s" query_params = (ps[0], ps[1]) use_query_limit = True else: if string.find(p, '%') > -1: query_addons = "LIKE %s" query_params = (p,) use_query_limit = True else: query_addons = "= %s" query_params = (p,) # special washing for fuzzy author index: if f in ('author', 'firstauthor', 'exactauthor'): query_params_washed = () for query_param in query_params: query_params_washed += (wash_author_name(query_param),) query_params = query_params_washed # perform search: if use_query_limit: try: res = run_sql_with_limit("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons), query_params, wildcard_limit=wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: res = run_sql("SELECT term,hitlist FROM %s WHERE term %s" % (idxphraseX, query_addons), query_params) # fill the result set: for word, hitlist in res: hitset_bibphrase = HitSet(hitlist) # add the results: if set_used: set.union_update(hitset_bibphrase) else: set = hitset_bibphrase set_used = 1 #check to see if the query limit was reached if limit_reached: #raise an exception, so we can print a nice message to the user raise InvenioWebSearchWildcardLimitError(set) # okay, return result set: return set def search_unit_in_bibxxx(p, f, type, wl=0): """Searches for pattern 'p' inside bibxxx tables for field 'f' and returns hitset of recIDs found. The search type is defined by 'type' (e.g. equals to 'r' for a regexp search).""" # FIXME: quick hack for the journal index if f == 'journal': return search_unit_in_bibwords(p, f, wl=wl) p_orig = p # saving for eventual future 'no match' reporting limit_reached = 0 # flag for knowing if the query limit has been reached use_query_limit = False # flag for knowing if to limit the query results or not query_addons = "" # will hold additional SQL code for the query query_params = () # will hold parameters for the query (their number may vary depending on TYPE argument) # wash arguments: f = string.replace(f, '*', '%') # replace truncation char '*' in field definition if type == 'r': query_addons = "REGEXP %s" query_params = (p,) use_query_limit = True else: p = string.replace(p, '*', '%') # we now use '*' as the truncation character ps = string.split(p, "->", 1) # check for span query: if len(ps) == 2 and not (ps[0].endswith(' ') or ps[1].startswith(' ')): query_addons = "BETWEEN %s AND %s" query_params = (ps[0], ps[1]) use_query_limit = True else: if string.find(p, '%') > -1: query_addons = "LIKE %s" query_params = (p,) use_query_limit = True else: query_addons = "= %s" query_params = (p,) # construct 'tl' which defines the tag list (MARC tags) to search in: tl = [] if str(f[0]).isdigit() and str(f[1]).isdigit(): tl.append(f) # 'f' seems to be okay as it starts by two digits else: # deduce desired MARC tags on the basis of chosen 'f' tl = get_field_tags(f) if not tl: # f index does not exist, nevermind pass # okay, start search: l = [] # will hold list of recID that matched for t in tl: # deduce into which bibxxx table we will search: digit1, digit2 = int(t[0]), int(t[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) # construct and run query: if t == "001": if query_addons.find('BETWEEN') > -1 or query_addons.find('=') > -1: # verify that the params are integers (to avoid returning record 123 when searching for 123foo) try: query_params = tuple(int(param) for param in query_params) except ValueError: return HitSet() if use_query_limit: try: res = run_sql_with_limit("SELECT id FROM bibrec WHERE id %s" % query_addons, query_params, wildcard_limit=wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: res = run_sql("SELECT id FROM bibrec WHERE id %s" % query_addons, query_params) else: query = "SELECT bibx.id_bibrec FROM %s AS bx LEFT JOIN %s AS bibx ON bx.id=bibx.id_bibxxx WHERE bx.value %s" % \ (bx, bibx, query_addons) if len(t) != 6 or t[-1:]=='%': # wildcard query, or only the beginning of field 't' # is defined, so add wildcard character: query += " AND bx.tag LIKE %s" query_params = query_params + (t + '%',) else: # exact query for 't': query += " AND bx.tag=%s" query_params = query_params + (t,) if use_query_limit: try: res = run_sql_with_limit(query, query_params, wildcard_limit=wl) except InvenioDbQueryWildcardLimitError, excp: res = excp.res limit_reached = 1 # set the limit reached flag to true else: res = run_sql(query, query_params) # fill the result set: for id_bibrec in res: if id_bibrec[0]: l.append(id_bibrec[0]) # check no of hits found: nb_hits = len(l) # okay, return result set: set = HitSet(l) #check to see if the query limit was reached if limit_reached: #raise an exception, so we can print a nice message to the user raise InvenioWebSearchWildcardLimitError(set) return set def search_unit_in_solr(p, f=None, m=None): """ Query the Solr full-text index and return an intbitset corresponding to the result. Parameters (p,f,m) are usual search unit ones. """ if m and (m == 'a' or m == 'r'): # phrase/regexp query if p.startswith('%') and p.endswith('%'): p = p[1:-1] # fix for partial phrase p = '"' + p + '"' return solr_get_bitset(p, CFG_SOLR_URL) def search_unit_in_bibrec(datetext1, datetext2, type='c'): """ Return hitset of recIDs found that were either created or modified (according to 'type' arg being 'c' or 'm') from datetext1 until datetext2, inclusive. Does not pay attention to pattern, collection, anything. Useful to intersect later on with the 'real' query. """ set = HitSet() if type.startswith("m"): type = "modification_date" else: type = "creation_date" # by default we are searching for creation dates if datetext1 == datetext2: res = run_sql("SELECT id FROM bibrec WHERE %s LIKE %%s" % (type,), (datetext1 + '%',)) else: res = run_sql("SELECT id FROM bibrec WHERE %s>=%%s AND %s<=%%s" % (type, type), (datetext1, datetext2)) for row in res: set += row[0] return set def search_unit_by_times_cited(p): """ Return histset of recIDs found that are cited P times. Usually P looks like '10->23'. """ numstr = '"'+p+'"' #this is sort of stupid but since we may need to #get the records that do _not_ have cites, we have to #know the ids of all records, too #but this is needed only if bsu_p is 0 or 0 or 0->0 allrecs = [] if p == 0 or p == "0" or \ p.startswith("0->") or p.endswith("->0"): allrecs = HitSet(run_sql("SELECT id FROM bibrec")) return get_records_with_num_cites(numstr, allrecs) def search_unit_refersto(query): """ Search for records satisfying the query (e.g. author:ellis) and return list of records referred to by these records. """ if query: ahitset = search_pattern(p=query) if ahitset: return get_refersto_hitset(ahitset) else: return HitSet([]) else: return HitSet([]) def search_unit_citedby(query): """ Search for records satisfying the query (e.g. author:ellis) and return list of records cited by these records. """ if query: ahitset = search_pattern(p=query) if ahitset: return get_citedby_hitset(ahitset) else: return HitSet([]) else: return HitSet([]) def intersect_results_with_collrecs(req, hitset_in_any_collection, colls, ap=0, of="hb", verbose=0, ln=CFG_SITE_LANG, display_nearest_terms_box=True): """Return dict of hitsets given by intersection of hitset with the collection universes.""" _ = gettext_set_language(ln) # search stage 4: intersect with the collection universe: if verbose and of.startswith("h"): t1 = os.times()[4] results = {} results_nbhits = 0 for coll in colls: results[coll] = hitset_in_any_collection & get_collection_reclist(coll) results_nbhits += len(results[coll]) if results_nbhits == 0: # no hits found, try to search in Home: results_in_Home = hitset_in_any_collection & get_collection_reclist(CFG_SITE_NAME) if len(results_in_Home) > 0: # some hits found in Home, so propose this search: if of.startswith("h") and display_nearest_terms_box: url = websearch_templates.build_search_url(req.argd, cc=CFG_SITE_NAME, c=[]) print_warning(req, _("No match found in collection %(x_collection)s. Other public collections gave %(x_url_open)s%(x_nb_hits)d hits%(x_url_close)s.") %\ {'x_collection': '<em>' + string.join([get_coll_i18nname(coll, ln, False) for coll in colls], ', ') + '</em>', 'x_url_open': '<a class="nearestterms" href="%s">' % (url), 'x_nb_hits': len(results_in_Home), 'x_url_close': '</a>'}) results = {} else: # no hits found in Home, recommend different search terms: if of.startswith("h") and display_nearest_terms_box: print_warning(req, _("No public collection matched your query. " "If you were looking for a non-public document, please choose " "the desired restricted collection first.")) results = {} if verbose and of.startswith("h"): t2 = os.times()[4] print_warning(req, "Search stage 4: intersecting with collection universe gave %d hits." % results_nbhits) print_warning(req, "Search stage 4: execution took %.2f seconds." % (t2 - t1)) return results def intersect_results_with_hitset(req, results, hitset, ap=0, aptext="", of="hb"): """Return intersection of search 'results' (a dict of hitsets with collection as key) with the 'hitset', i.e. apply 'hitset' intersection to each collection within search 'results'. If the final 'results' set is to be empty, and 'ap' (approximate pattern) is true, and then print the `warningtext' and return the original 'results' set unchanged. If 'ap' is false, then return empty results set. """ if ap: results_ap = copy.deepcopy(results) else: results_ap = {} # will return empty dict in case of no hits found nb_total = 0 for coll in results.keys(): results[coll].intersection_update(hitset) nb_total += len(results[coll]) if nb_total == 0: if of.startswith("h"): print_warning(req, aptext) results = results_ap return results def create_similarly_named_authors_link_box(author_name, ln=CFG_SITE_LANG): """Return a box similar to ``Not satisfied...'' one by proposing author searches for similar names. Namely, take AUTHOR_NAME and the first initial of the firstame (after comma) and look into author index whether authors with e.g. middle names exist. Useful mainly for CERN Library that sometimes contains name forms like Ellis-N, Ellis-Nick, Ellis-Nicolas all denoting the same person. The box isn't proposed if no similarly named authors are found to exist. """ # return nothing if not configured: if CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX == 0: return "" # return empty box if there is no initial: if re.match(r'[^ ,]+, [^ ]', author_name) is None: return "" # firstly find name comma initial: author_name_to_search = re.sub(r'^([^ ,]+, +[^ ,]).*$', '\\1', author_name) # secondly search for similar name forms: similar_author_names = {} for name in author_name_to_search, strip_accents(author_name_to_search): for tag in get_field_tags("author"): # deduce into which bibxxx table we will search: digit1, digit2 = int(tag[0]), int(tag[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) if len(tag) != 6 or tag[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value LIKE %%s AND bx.tag LIKE %%s""" % bx, (name + "%", tag + "%")) else: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value LIKE %%s AND bx.tag=%%s""" % bx, (name + "%", tag)) for row in res: similar_author_names[row[0]] = 1 # remove the original name and sort the list: try: del similar_author_names[author_name] except KeyError: pass # thirdly print the box: out = "" if similar_author_names: out_authors = similar_author_names.keys() out_authors.sort() tmp_authors = [] for out_author in out_authors: nbhits = get_nbhits_in_bibxxx(out_author, "author") if nbhits: tmp_authors.append((out_author, nbhits)) out += websearch_templates.tmpl_similar_author_names( authors=tmp_authors, ln=ln) return out def create_nearest_terms_box(urlargd, p, f, t='w', n=5, ln=CFG_SITE_LANG, intro_text_p=True): """Return text box containing list of 'n' nearest terms above/below 'p' for the field 'f' for matching type 't' (words/phrases) in language 'ln'. Propose new searches according to `urlargs' with the new words. If `intro_text_p' is true, then display the introductory message, otherwise print only the nearest terms in the box content. """ # load the right message language _ = gettext_set_language(ln) out = "" nearest_terms = [] if not p: # sanity check p = "." if p.startswith('%') and p.endswith('%'): p = p[1:-1] # fix for partial phrase index_id = get_index_id_from_field(f) if f == 'fulltext': if CFG_SOLR_URL: return _("No match found, please enter different search terms.") else: # FIXME: workaround for not having native phrase index yet t = 'w' # special indexes: if f == 'refersto': return _("There are no records referring to %s.") % cgi.escape(p) if f == 'citedby': return _("There are no records cited by %s.") % cgi.escape(p) # look for nearest terms: if t == 'w': nearest_terms = get_nearest_terms_in_bibwords(p, f, n, n) if not nearest_terms: return _("No word index is available for %s.") % \ ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>') else: nearest_terms = [] if index_id: nearest_terms = get_nearest_terms_in_idxphrase(p, index_id, n, n) if f == 'datecreated' or f == 'datemodified': nearest_terms = get_nearest_terms_in_bibrec(p, f, n, n) if not nearest_terms: nearest_terms = get_nearest_terms_in_bibxxx(p, f, n, n) if not nearest_terms: return _("No phrase index is available for %s.") % \ ('<em>' + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + '</em>') terminfo = [] for term in nearest_terms: if t == 'w': hits = get_nbhits_in_bibwords(term, f) else: if index_id: hits = get_nbhits_in_idxphrases(term, f) elif f == 'datecreated' or f == 'datemodified': hits = get_nbhits_in_bibrec(term, f) else: hits = get_nbhits_in_bibxxx(term, f) argd = {} argd.update(urlargd) # check which fields contained the requested parameter, and replace it. for (px, fx) in ('p', 'f'), ('p1', 'f1'), ('p2', 'f2'), ('p3', 'f3'): if px in argd: argd_px = argd[px] if t == 'w': # p was stripped of accents, to do the same: argd_px = strip_accents(argd_px) if f == argd[fx] or f == "anyfield" or f == "": if string.find(argd_px, p) > -1: argd[px] = string.replace(argd_px, p, term) break else: if string.find(argd_px, f+':'+p) > -1: argd[px] = string.replace(argd_px, f+':'+p, f+':'+term) break elif string.find(argd_px, f+':"'+p+'"') > -1: argd[px] = string.replace(argd_px, f+':"'+p+'"', f+':"'+term+'"') break elif string.find(argd_px, f+':\''+p+'\'') > -1: argd[px] = string.replace(argd_px, f+':\''+p+'\'', f+':\''+term+'\'') break terminfo.append((term, hits, argd)) intro = "" if intro_text_p: # add full leading introductory text if f: intro = _("Search term %(x_term)s inside index %(x_index)s did not match any record. Nearest terms in any collection are:") % \ {'x_term': "<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>", 'x_index': "<em>" + cgi.escape(get_field_i18nname(get_field_name(f) or f, ln, False)) + "</em>"} else: intro = _("Search term %s did not match any record. Nearest terms in any collection are:") % \ ("<em>" + cgi.escape(p.startswith("%") and p.endswith("%") and p[1:-1] or p) + "</em>") return websearch_templates.tmpl_nearest_term_box(p=p, ln=ln, f=f, terminfo=terminfo, intro=intro) def get_nearest_terms_in_bibwords(p, f, n_below, n_above): """Return list of +n -n nearest terms to word `p' in index for field `f'.""" nearest_words = [] # will hold the (sorted) list of nearest words to return # deduce into which bibwordsX table we will search: bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: bibwordsX = "idxWORD%02dF" % index_id else: return nearest_words # firstly try to get `n' closest words above `p': res = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % bibwordsX, (p, n_above)) for row in res: nearest_words.append(row[0]) nearest_words.reverse() # secondly insert given word `p': nearest_words.append(p) # finally try to get `n' closest words below `p': res = run_sql("SELECT term FROM %s WHERE term>%%s ORDER BY term ASC LIMIT %%s" % bibwordsX, (p, n_below)) for row in res: nearest_words.append(row[0]) return nearest_words def get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above): """Browse (-n_above, +n_below) closest bibliographic phrases for the given pattern p in the given field idxPHRASE table, regardless of collection. Return list of [phrase1, phrase2, ... , phrase_n].""" if CFG_INSPIRE_SITE and index_id in (3, 15): # FIXME: workaround due to new fuzzy index return [p,] idxphraseX = "idxPHRASE%02dF" % index_id res_above = run_sql("SELECT term FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above)) res_above = map(lambda x: x[0], res_above) res_above.reverse() res_below = run_sql("SELECT term FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below)) res_below = map(lambda x: x[0], res_below) return res_above + res_below def get_nearest_terms_in_idxphrase_with_collection(p, index_id, n_below, n_above, collection): """Browse (-n_above, +n_below) closest bibliographic phrases for the given pattern p in the given field idxPHRASE table, considering the collection (HitSet). Return list of [(phrase1, hitset), (phrase2, hitset), ... , (phrase_n, hitset)].""" idxphraseX = "idxPHRASE%02dF" % index_id res_above = run_sql("SELECT term,hitlist FROM %s WHERE term<%%s ORDER BY term DESC LIMIT %%s" % idxphraseX, (p, n_above * 3)) res_above = [(term, HitSet(hitlist) & collection) for term, hitlist in res_above] res_above = [(term, len(hitlist)) for term, hitlist in res_above if hitlist] res_below = run_sql("SELECT term,hitlist FROM %s WHERE term>=%%s ORDER BY term ASC LIMIT %%s" % idxphraseX, (p, n_below * 3)) res_below = [(term, HitSet(hitlist) & collection) for term, hitlist in res_below] res_below = [(term, len(hitlist)) for term, hitlist in res_below if hitlist] res_above.reverse() return res_above[-n_above:] + res_below[:n_below] def get_nearest_terms_in_bibxxx(p, f, n_below, n_above): """Browse (-n_above, +n_below) closest bibliographic phrases for the given pattern p in the given field f, regardless of collection. Return list of [phrase1, phrase2, ... , phrase_n].""" ## determine browse field: if not f and string.find(p, ":") > 0: # does 'p' contain ':'? f, p = string.split(p, ":", 1) # FIXME: quick hack for the journal index if f == 'journal': return get_nearest_terms_in_bibwords(p, f, n_below, n_above) ## We are going to take max(n_below, n_above) as the number of ## values to ferch from bibXXx. This is needed to work around ## MySQL UTF-8 sorting troubles in 4.0.x. Proper solution is to ## use MySQL 4.1.x or our own idxPHRASE in the future. index_id = get_index_id_from_field(f) if index_id: return get_nearest_terms_in_idxphrase(p, index_id, n_below, n_above) n_fetch = 2*max(n_below, n_above) ## construct 'tl' which defines the tag list (MARC tags) to search in: tl = [] if str(f[0]).isdigit() and str(f[1]).isdigit(): tl.append(f) # 'f' seems to be okay as it starts by two digits else: # deduce desired MARC tags on the basis of chosen 'f' tl = get_field_tags(f) ## start browsing to fetch list of hits: browsed_phrases = {} # will hold {phrase1: 1, phrase2: 1, ..., phraseN: 1} dict of browsed phrases (to make them unique) # always add self to the results set: browsed_phrases[p.startswith("%") and p.endswith("%") and p[1:-1] or p] = 1 for t in tl: # deduce into which bibxxx table we will search: digit1, digit2 = int(t[0]), int(t[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) # firstly try to get `n' closest phrases above `p': if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value<%%s AND bx.tag LIKE %%s ORDER BY bx.value DESC LIMIT %%s""" % bx, (p, t + "%", n_fetch)) else: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value<%%s AND bx.tag=%%s ORDER BY bx.value DESC LIMIT %%s""" % bx, (p, t, n_fetch)) for row in res: browsed_phrases[row[0]] = 1 # secondly try to get `n' closest phrases equal to or below `p': if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value>=%%s AND bx.tag LIKE %%s ORDER BY bx.value ASC LIMIT %%s""" % bx, (p, t + "%", n_fetch)) else: res = run_sql("""SELECT bx.value FROM %s AS bx WHERE bx.value>=%%s AND bx.tag=%%s ORDER BY bx.value ASC LIMIT %%s""" % bx, (p, t, n_fetch)) for row in res: browsed_phrases[row[0]] = 1 # select first n words only: (this is needed as we were searching # in many different tables and so aren't sure we have more than n # words right; this of course won't be needed when we shall have # one ACC table only for given field): phrases_out = browsed_phrases.keys() phrases_out.sort(lambda x, y: cmp(string.lower(strip_accents(x)), string.lower(strip_accents(y)))) # find position of self: try: idx_p = phrases_out.index(p) except: idx_p = len(phrases_out)/2 # return n_above and n_below: return phrases_out[max(0, idx_p-n_above):idx_p+n_below] def get_nearest_terms_in_bibrec(p, f, n_below, n_above): """Return list of nearest terms and counts from bibrec table. p is usually a date, and f either datecreated or datemodified. Note: below/above count is very approximative, not really respected. """ col = 'creation_date' if f == 'datemodified': col = 'modification_date' res_above = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s') FROM bibrec WHERE %s < %%s ORDER BY %s DESC LIMIT %%s""" % (col, col, col), (p, n_above)) res_below = run_sql("""SELECT DATE_FORMAT(%s,'%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s') FROM bibrec WHERE %s > %%s ORDER BY %s ASC LIMIT %%s""" % (col, col, col), (p, n_below)) out = set([]) for row in res_above: out.add(row[0]) for row in res_below: out.add(row[0]) out_list = list(out) out_list.sort() return list(out_list) def get_nbhits_in_bibrec(term, f): """Return number of hits in bibrec table. term is usually a date, and f is either 'datecreated' or 'datemodified'.""" col = 'creation_date' if f == 'datemodified': col = 'modification_date' res = run_sql("SELECT COUNT(*) FROM bibrec WHERE %s LIKE %%s" % (col,), (term + '%',)) return res[0][0] def get_nbhits_in_bibwords(word, f): """Return number of hits for word 'word' inside words index for field 'f'.""" out = 0 # deduce into which bibwordsX table we will search: bibwordsX = "idxWORD%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: bibwordsX = "idxWORD%02dF" % index_id else: return 0 if word: res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % bibwordsX, (word,)) for hitlist in res: out += len(HitSet(hitlist[0])) return out def get_nbhits_in_idxphrases(word, f): """Return number of hits for word 'word' inside phrase index for field 'f'.""" out = 0 # deduce into which bibwordsX table we will search: idxphraseX = "idxPHRASE%02dF" % get_index_id_from_field("anyfield") if f: index_id = get_index_id_from_field(f) if index_id: idxphraseX = "idxPHRASE%02dF" % index_id else: return 0 if word: res = run_sql("SELECT hitlist FROM %s WHERE term=%%s" % idxphraseX, (word,)) for hitlist in res: out += len(HitSet(hitlist[0])) return out def get_nbhits_in_bibxxx(p, f): """Return number of hits for word 'word' inside words index for field 'f'.""" ## determine browse field: if not f and string.find(p, ":") > 0: # does 'p' contain ':'? f, p = string.split(p, ":", 1) # FIXME: quick hack for the journal index if f == 'journal': return get_nbhits_in_bibwords(p, f) ## construct 'tl' which defines the tag list (MARC tags) to search in: tl = [] if str(f[0]).isdigit() and str(f[1]).isdigit(): tl.append(f) # 'f' seems to be okay as it starts by two digits else: # deduce desired MARC tags on the basis of chosen 'f' tl = get_field_tags(f) # start searching: recIDs = {} # will hold dict of {recID1: 1, recID2: 1, ..., } (unique recIDs, therefore) for t in tl: # deduce into which bibxxx table we will search: digit1, digit2 = int(t[0]), int(t[1]) bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) if len(t) != 6 or t[-1:]=='%': # only the beginning of field 't' is defined, so add wildcard character: res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx WHERE bx.value=%%s AND bx.tag LIKE %%s AND bibx.id_bibxxx=bx.id""" % (bibx, bx), (p, t + "%")) else: res = run_sql("""SELECT bibx.id_bibrec FROM %s AS bibx, %s AS bx WHERE bx.value=%%s AND bx.tag=%%s AND bibx.id_bibxxx=bx.id""" % (bibx, bx), (p, t)) for row in res: recIDs[row[0]] = 1 return len(recIDs) def get_mysql_recid_from_aleph_sysno(sysno): """Returns DB's recID for ALEPH sysno passed in the argument (e.g. "002379334CER"). Returns None in case of failure.""" out = None res = run_sql("""SELECT bb.id_bibrec FROM bibrec_bib97x AS bb, bib97x AS b WHERE b.value=%s AND b.tag='970__a' AND bb.id_bibxxx=b.id""", (sysno,)) if res: out = res[0][0] return out def guess_primary_collection_of_a_record(recID): """Return primary collection name a record recid belongs to, by testing 980 identifier. May lead to bad guesses when a collection is defined dynamically via dbquery. In that case, return 'CFG_SITE_NAME'.""" out = CFG_SITE_NAME dbcollids = get_fieldvalues(recID, "980__a") if dbcollids: for dbcollid in dbcollids: dbquery = "collection:" + dbcollid res = run_sql("SELECT name FROM collection WHERE dbquery=%s", (dbquery,)) if res: out = res[0][0] break if CFG_CERN_SITE: # dirty hack for ATLAS collections at CERN: if out in ('ATLAS Communications', 'ATLAS Internal Notes'): for alternative_collection in ('ATLAS Communications Physics', 'ATLAS Communications General', 'ATLAS Internal Notes Physics', 'ATLAS Internal Notes General',): if recID in get_collection_reclist(alternative_collection): out = alternative_collection break return out _re_collection_url = re.compile('/collection/(.+)') def guess_collection_of_a_record(recID, referer=None, recreate_cache_if_needed=True): """Return collection name a record recid belongs to, by first testing the referer URL if provided and otherwise returning the primary collection.""" if referer: dummy, hostname, path, dummy, query, dummy = urlparse.urlparse(referer) g = _re_collection_url.match(path) if g: name = urllib.unquote_plus(g.group(1)) if recID in get_collection_reclist(name): return name elif path.startswith('/search'): if recreate_cache_if_needed: collection_reclist_cache.recreate_cache_if_needed() query = cgi.parse_qs(query) for name in query.get('cc', []) + query.get('c', []): if recID in get_collection_reclist(name, recreate_cache_if_needed=False): return name return guess_primary_collection_of_a_record(recID) def is_record_in_any_collection(recID, recreate_cache_if_needed=True): """Return True if the record belongs to at least one collection. This is a good, although not perfect, indicator to guess if webcoll has already run after this record has been entered into the system. """ if recreate_cache_if_needed: collection_reclist_cache.recreate_cache_if_needed() for name in collection_reclist_cache.cache.keys(): if recID in get_collection_reclist(name, recreate_cache_if_needed=False): return True return False def get_all_collections_of_a_record(recID, recreate_cache_if_needed=True): """Return all the collection names a record belongs to. Note this function is O(n_collections).""" ret = [] if recreate_cache_if_needed: collection_reclist_cache.recreate_cache_if_needed() for name in collection_reclist_cache.cache.keys(): if recID in get_collection_reclist(name, recreate_cache_if_needed=False): ret.append(name) return ret def get_tag_name(tag_value, prolog="", epilog=""): """Return tag name from the known tag value, by looking up the 'tag' table. Return empty string in case of failure. Example: input='100__%', output=first author'.""" out = "" res = run_sql("SELECT name FROM tag WHERE value=%s", (tag_value,)) if res: out = prolog + res[0][0] + epilog return out def get_fieldcodes(): """Returns a list of field codes that may have been passed as 'search options' in URL. Example: output=['subject','division'].""" out = [] res = run_sql("SELECT DISTINCT(code) FROM field") for row in res: out.append(row[0]) return out def get_field_name(code): """Return the corresponding field_name given the field code. e.g. reportnumber -> report number.""" res = run_sql("SELECT name FROM field WHERE code=%s", (code, )) if res: return res[0][0] else: return "" def get_field_tags(field): """Returns a list of MARC tags for the field code 'field'. Returns empty list in case of error. Example: field='author', output=['100__%','700__%'].""" out = [] query = """SELECT t.value FROM tag AS t, field_tag AS ft, field AS f WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag ORDER BY ft.score DESC""" res = run_sql(query, (field, )) for val in res: out.append(val[0]) return out def get_fieldvalues(recIDs, tag, repetitive_values=True): """ Return list of field values for field TAG for the given record ID or list of record IDs. (RECIDS can be both an integer or a list of integers.) If REPETITIVE_VALUES is set to True, then return all values even if they are doubled. If set to False, then return unique values only. """ out = [] try: recIDs = int(recIDs) except: pass if isinstance(recIDs, (int, long)): recIDs =[recIDs,] if not isinstance(recIDs, (list, tuple)): return [] if len(recIDs) == 0: return [] if tag == "001___": # we have asked for tag 001 (=recID) that is not stored in bibXXx tables out = [str(recID) for recID in recIDs] else: # we are going to look inside bibXXx tables digits = tag[0:2] try: intdigits = int(digits) if intdigits < 0 or intdigits > 99: raise ValueError except ValueError: # invalid tag value asked for return [] bx = "bib%sx" % digits bibx = "bibrec_bib%sx" % digits queryparam = [] for recID in recIDs: queryparam.append(recID) if not repetitive_values: queryselect = "DISTINCT(bx.value)" else: queryselect = "bx.value" query = "SELECT %s FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec IN (%s) " \ " AND bx.id=bibx.id_bibxxx AND bx.tag LIKE %%s " \ " ORDER BY bibx.field_number, bx.tag ASC" % \ (queryselect, bx, bibx, ("%s,"*len(queryparam))[:-1]) res = run_sql(query, tuple(queryparam) + (tag,)) for row in res: out.append(row[0]) return out def get_fieldvalues_alephseq_like(recID, tags_in, can_see_hidden=False): """Return buffer of ALEPH sequential-like textual format with fields found in the list TAGS_IN for record RECID. If can_see_hidden is True, just print everything. Otherwise hide fields from CFG_BIBFORMAT_HIDDEN_TAGS. """ out = "" if type(tags_in) is not list: tags_in = [tags_in,] if len(tags_in) == 1 and len(tags_in[0]) == 6: ## case A: one concrete subfield asked, so print its value if found ## (use with care: can mislead if field has multiple occurrences) out += string.join(get_fieldvalues(recID, tags_in[0]),"\n") else: ## case B: print our "text MARC" format; works safely all the time # find out which tags to output: dict_of_tags_out = {} if not tags_in: for i in range(0, 10): for j in range(0, 10): dict_of_tags_out["%d%d%%" % (i, j)] = 1 else: for tag in tags_in: if len(tag) == 0: for i in range(0, 10): for j in range(0, 10): dict_of_tags_out["%d%d%%" % (i, j)] = 1 elif len(tag) == 1: for j in range(0, 10): dict_of_tags_out["%s%d%%" % (tag, j)] = 1 elif len(tag) < 5: dict_of_tags_out["%s%%" % tag] = 1 elif tag >= 6: dict_of_tags_out[tag[0:5]] = 1 tags_out = dict_of_tags_out.keys() tags_out.sort() # search all bibXXx tables as needed: for tag in tags_out: digits = tag[0:2] try: intdigits = int(digits) if intdigits < 0 or intdigits > 99: raise ValueError except ValueError: # invalid tag value asked for continue if tag.startswith("001") or tag.startswith("00%"): if out: out += "\n" out += "%09d %s %d" % (recID, "001__", recID) bx = "bib%sx" % digits bibx = "bibrec_bib%sx" % digits query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx) res = run_sql(query, (recID, str(tag)+'%')) # go through fields: field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] printme = True #check the stuff in hiddenfields if not can_see_hidden: for htag in CFG_BIBFORMAT_HIDDEN_TAGS: ltag = len(htag) samelenfield = field[0:ltag] if samelenfield == htag: printme = False if ind1 == "_": ind1 = "" if ind2 == "_": ind2 = "" # print field tag if printme: if field_number != field_number_old or field[:-1] != field_old[:-1]: if out: out += "\n" out += "%09d %s " % (recID, field[:5]) field_number_old = field_number field_old = field # print subfield value if field[0:2] == "00" and field[-1:] == "_": out += value else: out += "$$%s%s" % (field[-1:], value) return out def record_exists(recID): """Return 1 if record RECID exists. Return 0 if it doesn't exist. Return -1 if it exists but is marked as deleted. """ out = 0 res = run_sql("SELECT id FROM bibrec WHERE id=%s", (recID,), 1) if res: try: # if recid is '123foo', mysql will return id=123, and we don't want that recID = int(recID) except ValueError: return 0 # record exists; now check whether it isn't marked as deleted: dbcollids = get_fieldvalues(recID, "980__%") if ("DELETED" in dbcollids) or (CFG_CERN_SITE and "DUMMY" in dbcollids): out = -1 # exists, but marked as deleted else: out = 1 # exists fine return out def record_empty(recID): """ Is this record empty, e.g. has only 001, waiting for integration? @param recID: the record identifier. @type recID: int @return: 1 if the record is empty, 0 otherwise. @rtype: int """ record = get_record(recID) if record is None or len(record) < 2: return 1 else: return 0 def record_public_p(recID, recreate_cache_if_needed=True): """Return 1 if the record is public, i.e. if it can be found in the Home collection. Return 0 otherwise. """ return recID in get_collection_reclist(CFG_SITE_NAME, recreate_cache_if_needed=recreate_cache_if_needed) def get_creation_date(recID, fmt="%Y-%m-%d"): "Returns the creation date of the record 'recID'." out = "" res = run_sql("SELECT DATE_FORMAT(creation_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out def get_modification_date(recID, fmt="%Y-%m-%d"): "Returns the date of last modification for the record 'recID'." out = "" res = run_sql("SELECT DATE_FORMAT(modification_date,%s) FROM bibrec WHERE id=%s", (fmt, recID), 1) if res: out = res[0][0] return out def print_warning(req, msg, msg_type='', prologue='<br />', epilogue='<br />'): "Prints warning message and flushes output." if req and msg: req.write(websearch_templates.tmpl_print_warning( msg = msg, type = msg_type, prologue = prologue, epilogue = epilogue, )) return def print_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10, aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="", sc=1, pl_in_url="", d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="", cpu_time=-1, middle_only=0): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page.""" # sanity check: if jrec < 1: jrec = 1 if jrec > nb_found: jrec = max(nb_found-rg+1, 1) return websearch_templates.tmpl_print_search_info( ln = ln, collection = collection, aas = aas, collection_name = get_coll_i18nname(collection, ln, False), collection_id = get_colID(collection), middle_only = middle_only, rg = rg, nb_found = nb_found, sf = sf, so = so, rm = rm, of = of, ot = ot, p = p, f = f, p1 = p1, p2 = p2, p3 = p3, f1 = f1, f2 = f2, f3 = f3, m1 = m1, m2 = m2, m3 = m3, op1 = op1, op2 = op2, pl_in_url = pl_in_url, d1y = d1y, d1m = d1m, d1d = d1d, d2y = d2y, d2m = d2m, d2d = d2d, dt = dt, jrec = jrec, sc = sc, sp = sp, all_fieldcodes = get_fieldcodes(), cpu_time = cpu_time, ) def print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, collection=CFG_SITE_NAME, nb_found=-1, jrec=1, rg=10, aas=0, ln=CFG_SITE_LANG, p1="", p2="", p3="", f1="", f2="", f3="", m1="", m2="", m3="", op1="", op2="", sc=1, pl_in_url="", d1y=0, d1m=0, d1d=0, d2y=0, d2m=0, d2d=0, dt="", cpu_time=-1, middle_only=0): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page.""" out = "" # sanity check: if jrec < 1: jrec = 1 if jrec > nb_found: jrec = max(nb_found-rg+1, 1) return websearch_templates.tmpl_print_hosted_search_info( ln = ln, collection = collection, aas = aas, collection_name = get_coll_i18nname(collection, ln, False), collection_id = get_colID(collection), middle_only = middle_only, rg = rg, nb_found = nb_found, sf = sf, so = so, rm = rm, of = of, ot = ot, p = p, f = f, p1 = p1, p2 = p2, p3 = p3, f1 = f1, f2 = f2, f3 = f3, m1 = m1, m2 = m2, m3 = m3, op1 = op1, op2 = op2, pl_in_url = pl_in_url, d1y = d1y, d1m = d1m, d1d = d1d, d2y = d2y, d2m = d2m, d2d = d2d, dt = dt, jrec = jrec, sc = sc, sp = sp, all_fieldcodes = get_fieldcodes(), cpu_time = cpu_time, ) def print_results_overview(colls, results_final_nb_total, results_final_nb, cpu_time, ln=CFG_SITE_LANG, ec=[], hosted_colls_potential_results_p=False): """Prints results overview box with links to particular collections below.""" out = "" new_colls = [] for coll in colls: new_colls.append({ 'id': get_colID(coll), 'code': coll, 'name': get_coll_i18nname(coll, ln, False), }) return websearch_templates.tmpl_print_results_overview( ln = ln, results_final_nb_total = results_final_nb_total, results_final_nb = results_final_nb, cpu_time = cpu_time, colls = new_colls, ec = ec, hosted_colls_potential_results_p = hosted_colls_potential_results_p, ) def print_hosted_results(url_and_engine, ln=CFG_SITE_LANG, of=None, req=None, no_records_found=False, search_timed_out=False, limit=CFG_EXTERNAL_COLLECTION_MAXRESULTS): """Prints the full results of a hosted collection""" if of.startswith("h"): if no_records_found: return "<br />No results found." if search_timed_out: return "<br />The search engine did not respond in time." return websearch_templates.tmpl_print_hosted_results( url_and_engine=url_and_engine, ln=ln, of=of, req=req, limit=limit ) def sort_records(req, recIDs, sort_field='', sort_order='d', sort_pattern='', verbose=0, of='hb', ln=CFG_SITE_LANG): """Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'. If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by 'sort pattern', for example "sort by report number that starts by CERN-PS". Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly.""" _ = gettext_set_language(ln) ## check arguments: if not sort_field: return recIDs if len(recIDs) > CFG_WEBSEARCH_NB_RECORDS_TO_SORT: if of.startswith('h'): print_warning(req, _("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.") % CFG_WEBSEARCH_NB_RECORDS_TO_SORT, "Warning") return recIDs sort_fields = string.split(sort_field, ",") recIDs_dict = {} recIDs_out = [] ## first deduce sorting MARC tag out of the 'sort_field' argument: tags = [] for sort_field in sort_fields: if sort_field and str(sort_field[0:2]).isdigit(): # sort_field starts by two digits, so this is probably a MARC tag already tags.append(sort_field) else: # let us check the 'field' table query = """SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f WHERE f.code=%s AND ft.id_field=f.id AND t.id=ft.id_tag ORDER BY ft.score DESC""" res = run_sql(query, (sort_field, )) if res: for row in res: tags.append(row[0]) else: if of.startswith('h'): print_warning(req, _("Sorry, %s does not seem to be a valid sort option. Choosing title sort instead.") % cgi.escape(sort_field), "Error") tags.append("245__a") if verbose >= 3: print_warning(req, "Sorting by tags %s." % cgi.escape(repr(tags))) if sort_pattern: print_warning(req, "Sorting preferentially by %s." % cgi.escape(sort_pattern)) ## check if we have sorting tag defined: if tags: # fetch the necessary field values: for recID in recIDs: val = "" # will hold value for recID according to which sort vals = [] # will hold all values found in sorting tag for recID for tag in tags: if CFG_CERN_SITE and tag == '773__c': # CERN hack: journal sorting # 773__c contains page numbers, e.g. 3-13, and we want to sort by 3, and numerically: vals.extend(["%050s" % x.split("-",1)[0] for x in get_fieldvalues(recID, tag)]) else: vals.extend(get_fieldvalues(recID, tag)) if sort_pattern: # try to pick that tag value that corresponds to sort pattern bingo = 0 for v in vals: if v.lower().startswith(sort_pattern.lower()): # bingo! bingo = 1 val = v break if not bingo: # sort_pattern not present, so add other vals after spaces val = sort_pattern + " " + string.join(vals) else: # no sort pattern defined, so join them all together val = string.join(vals) val = strip_accents(val.lower()) # sort values regardless of accents and case if recIDs_dict.has_key(val): recIDs_dict[val].append(recID) else: recIDs_dict[val] = [recID] # sort them: recIDs_dict_keys = recIDs_dict.keys() recIDs_dict_keys.sort() # now that keys are sorted, create output array: for k in recIDs_dict_keys: for s in recIDs_dict[k]: recIDs_out.append(s) # ascending or descending? if sort_order == 'a': recIDs_out.reverse() # okay, we are done return recIDs_out else: # good, no sort needed return recIDs def print_records(req, recIDs, jrec=1, rg=10, format='hb', ot='', ln=CFG_SITE_LANG, relevances=[], relevances_prologue="(", relevances_epilogue="%%)", decompress=zlib.decompress, search_pattern='', print_records_prologue_p=True, print_records_epilogue_p=True, verbose=0, tab='', sf='', so='d', sp='', rm=''): """ Prints list of records 'recIDs' formatted according to 'format' in groups of 'rg' starting from 'jrec'. Assumes that the input list 'recIDs' is sorted in reverse order, so it counts records from tail to head. A value of 'rg=-9999' means to print all records: to be used with care. Print also list of RELEVANCES for each record (if defined), in between RELEVANCE_PROLOGUE and RELEVANCE_EPILOGUE. Print prologue and/or epilogue specific to 'format' if 'print_records_prologue_p' and/or print_records_epilogue_p' are True. 'sf' is sort field and 'rm' is ranking method that are passed here only for proper linking purposes: e.g. when a certain ranking method or a certain sort field was selected, keep it selected in any dynamic search links that may be printed. """ # load the right message language _ = gettext_set_language(ln) # sanity checking: if req is None: return # get user_info (for formatting based on user) if isinstance(req, cStringIO.OutputType): user_info = {} else: user_info = collect_user_info(req) if len(recIDs): nb_found = len(recIDs) if rg == -9999: # print all records rg = nb_found else: rg = abs(rg) if jrec < 1: # sanity checks jrec = 1 if jrec > nb_found: jrec = max(nb_found-rg+1, 1) # will print records from irec_max to irec_min excluded: irec_max = nb_found - jrec irec_min = nb_found - jrec - rg if irec_min < 0: irec_min = -1 if irec_max >= nb_found: irec_max = nb_found - 1 #req.write("%s:%d-%d" % (recIDs, irec_min, irec_max)) if format.startswith('x'): # print header if needed if print_records_prologue_p: print_records_prologue(req, format) # print records recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)] format_records(recIDs_to_print, format, ln=ln, search_pattern=search_pattern, record_separator="\n", user_info=user_info, req=req) # print footer if needed if print_records_epilogue_p: print_records_epilogue(req, format) elif format.startswith('t') or str(format[0:3]).isdigit(): # we are doing plain text output: for irec in range(irec_max, irec_min, -1): x = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) req.write(x) if x: req.write('\n') elif format == 'excel': recIDs_to_print = [recIDs[x] for x in range(irec_max, irec_min, -1)] create_excel(recIDs=recIDs_to_print, req=req, ln=ln, ot=ot) else: # we are doing HTML output: if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"): # portfolio and on-the-fly formats: for irec in range(irec_max, irec_min, -1): req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)) elif format.startswith("hb"): # HTML brief format: display_add_to_basket = True if user_info: if user_info['email'] == 'guest': if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4: display_add_to_basket = False else: if not user_info['precached_usebaskets']: display_add_to_basket = False req.write(websearch_templates.tmpl_record_format_htmlbrief_header( ln = ln)) for irec in range(irec_max, irec_min, -1): row_number = jrec+irec_max-irec recid = recIDs[irec] if relevances and relevances[irec]: relevance = relevances[irec] else: relevance = '' record = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) req.write(websearch_templates.tmpl_record_format_htmlbrief_body( ln = ln, recid = recid, row_number = row_number, relevance = relevance, record = record, relevances_prologue = relevances_prologue, relevances_epilogue = relevances_epilogue, display_add_to_basket = display_add_to_basket )) req.write(websearch_templates.tmpl_record_format_htmlbrief_footer( ln = ln, display_add_to_basket = display_add_to_basket)) elif format.startswith("hd"): # HTML detailed format: for irec in range(irec_max, irec_min, -1): if record_exists(recIDs[irec]) == -1: print_warning(req, _("The record has been deleted.")) continue unordered_tabs = get_detailed_page_tabs(get_colID(guess_primary_collection_of_a_record(recIDs[irec])), recIDs[irec], ln=ln) ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()] ordered_tabs_id.sort(lambda x,y: cmp(x[1],y[1])) link_ln = '' if ln != CFG_SITE_LANG: link_ln = '?ln=%s' % ln recid = recIDs[irec] recid_to_display = recid # Record ID used to build the URL. if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: try: recid_to_display = get_fieldvalues(recid, CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG)[0] except IndexError: # No external sysno is available, keep using # internal recid. pass citedbynum = 0 #num of citations, to be shown in the cit tab references = -1 #num of references if CFG_BIBRANK_SHOW_CITATION_LINKS: citedbynum = get_cited_by_count(recid) if not CFG_CERN_SITE:#FIXME:should be replaced by something like CFG_SHOW_REFERENCES reftag = "" reftags = get_field_tags("reference") if reftags: reftag = reftags[0] tmprec = get_record(recid) if reftag and len(reftag) > 4: references = len(record_get_field_instances(tmprec, reftag[0:3], reftag[3], reftag[4])) tabs = [(unordered_tabs[tab_id]['label'], \ '%s/record/%s/%s%s' % (CFG_SITE_URL, recid_to_display, tab_id, link_ln), \ tab_id == tab, unordered_tabs[tab_id]['enabled']) \ for (tab_id, order) in ordered_tabs_id if unordered_tabs[tab_id]['visible'] == True] # load content if tab == 'usage': req.write(webstyle_templates.detailed_record_container_top(recIDs[irec], tabs, ln, citationnum=citedbynum, referencenum=references)) r = calculate_reading_similarity_list(recIDs[irec], "downloads") downloadsimilarity = None downloadhistory = None #if r: # downloadsimilarity = r if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS: downloadhistory = create_download_history_graph_and_box(recIDs[irec], ln) r = calculate_reading_similarity_list(recIDs[irec], "pageviews") viewsimilarity = None if r: viewsimilarity = r content = websearch_templates.tmpl_detailed_record_statistics(recIDs[irec], ln, downloadsimilarity=downloadsimilarity, downloadhistory=downloadhistory, viewsimilarity=viewsimilarity) req.write(content) req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec], tabs, ln)) elif tab == 'citations': recid = recIDs[irec] req.write(webstyle_templates.detailed_record_container_top(recid, tabs, ln, citationnum=citedbynum, referencenum=references)) req.write(websearch_templates.tmpl_detailed_record_citations_prologue(recid, ln)) # Citing citinglist = calculate_cited_by_list(recid) req.write(websearch_templates.tmpl_detailed_record_citations_citing_list(recid, ln, citinglist, sf=sf, so=so, sp=sp, rm=rm)) # Self-cited selfcited = get_self_cited_by(recid) req.write(websearch_templates.tmpl_detailed_record_citations_self_cited(recid, ln, selfcited=selfcited, citinglist=citinglist)) # Co-cited s = calculate_co_cited_with_list(recid) cociting = None if s: cociting = s req.write(websearch_templates.tmpl_detailed_record_citations_co_citing(recid, ln, cociting=cociting)) # Citation history, if needed citationhistory = None if citinglist: citationhistory = create_citation_history_graph_and_box(recid, ln) #debug if verbose > 3: print_warning(req, "Citation graph debug: " + \ str(len(citationhistory))) req.write(websearch_templates.tmpl_detailed_record_citations_citation_history(recid, ln, citationhistory)) req.write(websearch_templates.tmpl_detailed_record_citations_epilogue(recid, ln)) req.write(webstyle_templates.detailed_record_container_bottom(recid, tabs, ln)) elif tab == 'references': req.write(webstyle_templates.detailed_record_container_top(recIDs[irec], tabs, ln, citationnum=citedbynum, referencenum=references)) req.write(format_record(recIDs[irec], 'HDREF', ln=ln, user_info=user_info, verbose=verbose)) req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec], tabs, ln)) elif tab == 'keywords': from invenio.bibclassify_webinterface import \ record_get_keywords, get_sorting_options, \ generate_keywords, get_keywords_body from invenio.webinterface_handler import wash_urlargd form = req.form argd = wash_urlargd(form, { 'generate': (str, 'no'), 'sort': (str, 'occurrences'), 'type': (str, 'tagcloud'), 'numbering': (str, 'off'), }) recid = recIDs[irec] req.write(webstyle_templates.detailed_record_container_top(recid, tabs, ln, citationnum=citedbynum, referencenum=references)) if argd['generate'] == 'yes': # The user asked to generate the keywords. keywords = generate_keywords(req, recid) else: # Get the keywords contained in the MARC. keywords = record_get_keywords(recid, argd) if keywords: req.write(get_sorting_options(argd, keywords)) elif argd['sort'] == 'related' and not keywords: req.write('You may want to run BibIndex.') # Output the keywords or the generate button. get_keywords_body(keywords, req, recid, argd) req.write(webstyle_templates.detailed_record_container_bottom(recid, tabs, ln)) elif tab == 'plots': req.write(webstyle_templates.detailed_record_container_top(recIDs[irec], tabs, ln)) content = websearch_templates.tmpl_record_plots( recID=recIDs[irec], ln=ln) req.write(content) req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec], tabs, ln)) else: # Metadata tab req.write(webstyle_templates.detailed_record_container_top(recIDs[irec], tabs, ln, show_short_rec_p=False, citationnum=citedbynum, referencenum=references)) creationdate = None modificationdate = None if record_exists(recIDs[irec]) == 1: creationdate = get_creation_date(recIDs[irec]) modificationdate = get_modification_date(recIDs[irec]) content = print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) content = websearch_templates.tmpl_detailed_record_metadata( recID = recIDs[irec], ln = ln, format = format, creationdate = creationdate, modificationdate = modificationdate, content = content) req.write(content) req.write(webstyle_templates.detailed_record_container_bottom(recIDs[irec], tabs, ln, creationdate=creationdate, modificationdate=modificationdate, show_short_rec_p=False)) if len(tabs) > 0: # Add the mini box at bottom of the page if CFG_WEBCOMMENT_ALLOW_REVIEWS: from invenio.webcomment import get_mini_reviews reviews = get_mini_reviews(recid = recIDs[irec], ln=ln) else: reviews = '' actions = format_record(recIDs[irec], 'HDACT', ln=ln, user_info=user_info, verbose=verbose) files = format_record(recIDs[irec], 'HDFILE', ln=ln, user_info=user_info, verbose=verbose) req.write(webstyle_templates.detailed_record_mini_panel(recIDs[irec], ln, format, files=files, reviews=reviews, actions=actions)) else: # Other formats for irec in range(irec_max, irec_min, -1): req.write(print_record(recIDs[irec], format, ot, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm)) else: print_warning(req, _("Use different search terms.")) def print_records_prologue(req, format): """ Print the appropriate prologue for list of records in the given format. """ prologue = "" # no prologue needed for HTML or Text formats if format.startswith('xm'): prologue = websearch_templates.tmpl_xml_marc_prologue() elif format.startswith('xn'): prologue = websearch_templates.tmpl_xml_nlm_prologue() elif format.startswith('xw'): prologue = websearch_templates.tmpl_xml_refworks_prologue() elif format.startswith('xr'): prologue = websearch_templates.tmpl_xml_rss_prologue() elif format.startswith('xe'): prologue = websearch_templates.tmpl_xml_endnote_prologue() elif format.startswith('xo'): prologue = websearch_templates.tmpl_xml_mods_prologue() elif format.startswith('xp'): prologue = websearch_templates.tmpl_xml_podcast_prologue() elif format.startswith('x'): prologue = websearch_templates.tmpl_xml_default_prologue() req.write(prologue) def print_records_epilogue(req, format): """ Print the appropriate epilogue for list of records in the given format. """ epilogue = "" # no epilogue needed for HTML or Text formats if format.startswith('xm'): epilogue = websearch_templates.tmpl_xml_marc_epilogue() elif format.startswith('xn'): epilogue = websearch_templates.tmpl_xml_nlm_epilogue() elif format.startswith('xw'): epilogue = websearch_templates.tmpl_xml_refworks_epilogue() elif format.startswith('xr'): epilogue = websearch_templates.tmpl_xml_rss_epilogue() elif format.startswith('xe'): epilogue = websearch_templates.tmpl_xml_endnote_epilogue() elif format.startswith('xo'): epilogue = websearch_templates.tmpl_xml_mods_epilogue() elif format.startswith('xp'): epilogue = websearch_templates.tmpl_xml_podcast_epilogue() elif format.startswith('x'): epilogue = websearch_templates.tmpl_xml_default_epilogue() req.write(epilogue) def get_record(recid): """Directly the record object corresponding to the recid.""" if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE: value = run_sql("SELECT value FROM bibfmt WHERE id_bibrec=%s AND FORMAT='recstruct'", (recid, )) if value: try: return deserialize_via_marshal(value[0][0]) except: ### In case of corruption, let's rebuild it! pass return create_record(print_record(recid, 'xm'))[0] def print_record(recID, format='hb', ot='', ln=CFG_SITE_LANG, decompress=zlib.decompress, search_pattern=None, user_info=None, verbose=0, sf='', so='d', sp='', rm=''): """ Prints record 'recID' formatted according to 'format'. 'sf' is sort field and 'rm' is ranking method that are passed here only for proper linking purposes: e.g. when a certain ranking method or a certain sort field was selected, keep it selected in any dynamic search links that may be printed. """ if format == 'recstruct': return get_record(recID) _ = gettext_set_language(ln) display_claim_this_paper = False try: display_claim_this_paper = user_info["precached_viewclaimlink"] except (KeyError, TypeError): display_claim_this_paper = False #check from user information if the user has the right to see hidden fields/tags in the #records as well can_see_hidden = (acc_authorize_action(user_info, 'runbibedit')[0] == 0) out = "" # sanity check: record_exist_p = record_exists(recID) if record_exist_p == 0: # doesn't exist return out # New Python BibFormat procedure for formatting # Old procedure follows further below # We must still check some special formats, but these # should disappear when BibFormat improves. if not (CFG_BIBFORMAT_USE_OLD_BIBFORMAT \ or format.lower().startswith('t') \ or format.lower().startswith('hm') \ or str(format[0:3]).isdigit() \ or ot): # Unspecified format is hd if format == '': format = 'hd' if record_exist_p == -1 and get_output_format_content_type(format) == 'text/html': # HTML output displays a default value for deleted records. # Other format have to deal with it. out += _("The record has been deleted.") else: out += call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) # at the end of HTML brief mode, print the "Detailed record" functionality: if format.lower().startswith('hb') and \ format.lower() != 'hb_p': out += websearch_templates.tmpl_print_record_brief_links(ln=ln, recID=recID, sf=sf, so=so, sp=sp, rm=rm, display_claim_link=display_claim_this_paper) return out # Old PHP BibFormat procedure for formatting # print record opening tags, if needed: if format == "marcxml" or format == "oai_dc": out += " <record>\n" out += " <header>\n" for oai_id in get_fieldvalues(recID, CFG_OAI_ID_FIELD): out += " <identifier>%s</identifier>\n" % oai_id out += " <datestamp>%s</datestamp>\n" % get_modification_date(recID) out += " </header>\n" out += " <metadata>\n" if format.startswith("xm") or format == "marcxml": # look for detailed format existence: query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s" res = run_sql(query, (recID, format), 1) if res and record_exist_p == 1: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format' -- they are not in "bibfmt" table; so fetch all the data from "bibXXx" tables: if format == "marcxml": out += """ <record xmlns="http://www.loc.gov/MARC21/slim">\n""" out += " <controlfield tag=\"001\">%d</controlfield>\n" % int(recID) elif format.startswith("xm"): out += """ <record>\n""" out += " <controlfield tag=\"001\">%d</controlfield>\n" % int(recID) if record_exist_p == -1: # deleted record, so display only OAI ID and 980: oai_ids = get_fieldvalues(recID, CFG_OAI_ID_FIELD) if oai_ids: out += "<datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\"><subfield code=\"%s\">%s</subfield></datafield>\n" % \ (CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4], CFG_OAI_ID_FIELD[4:5], CFG_OAI_ID_FIELD[5:6], oai_ids[0]) out += "<datafield tag=\"980\" ind1=\"\" ind2=\"\"><subfield code=\"c\">DELETED</subfield></datafield>\n" else: # controlfields query = "SELECT b.tag,b.value,bb.field_number FROM bib00x AS b, bibrec_bib00x AS bb "\ "WHERE bb.id_bibrec=%s AND b.id=bb.id_bibxxx AND b.tag LIKE '00%%' "\ "ORDER BY bb.field_number, b.tag ASC" res = run_sql(query, (recID, )) for row in res: field, value = row[0], row[1] value = encode_for_xml(value) out += """ <controlfield tag="%s" >%s</controlfield>\n""" % \ (encode_for_xml(field[0:3]), value) # datafields i = 1 # Do not process bib00x and bibrec_bib00x, as # they are controlfields. So start at bib01x and # bibrec_bib00x (and set i = 0 at the end of # first loop) for digit1 in range(0, 10): for digit2 in range(i, 10): bx = "bib%d%dx" % (digit1, digit2) bibx = "bibrec_bib%d%dx" % (digit1, digit2) query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s AS bb "\ "WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND b.tag LIKE %%s"\ "ORDER BY bb.field_number, b.tag ASC" % (bx, bibx) res = run_sql(query, (recID, str(digit1)+str(digit2)+'%')) field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] if ind1 == "_" or ind1 == "": ind1 = " " if ind2 == "_" or ind2 == "": ind2 = " " # print field tag, unless hidden printme = True if not can_see_hidden: for htag in CFG_BIBFORMAT_HIDDEN_TAGS: ltag = len(htag) samelenfield = field[0:ltag] if samelenfield == htag: printme = False if printme: if field_number != field_number_old or field[:-1] != field_old[:-1]: if field_number_old != -999: out += """ </datafield>\n""" out += """ <datafield tag="%s" ind1="%s" ind2="%s">\n""" % \ (encode_for_xml(field[0:3]), encode_for_xml(ind1), encode_for_xml(ind2)) field_number_old = field_number field_old = field # print subfield value value = encode_for_xml(value) out += """ <subfield code="%s">%s</subfield>\n""" % \ (encode_for_xml(field[-1:]), value) # all fields/subfields printed in this run, so close the tag: if field_number_old != -999: out += """ </datafield>\n""" i = 0 # Next loop should start looking at bib%0 and bibrec_bib00x # we are at the end of printing the record: out += " </record>\n" elif format == "xd" or format == "oai_dc": # XML Dublin Core format, possibly OAI -- select only some bibXXx fields: out += """ <dc xmlns="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://purl.org/dc/elements/1.1/ http://www.openarchives.org/OAI/1.1/dc.xsd">\n""" if record_exist_p == -1: out += "" else: for f in get_fieldvalues(recID, "041__a"): out += " <language>%s</language>\n" % f for f in get_fieldvalues(recID, "100__a"): out += " <creator>%s</creator>\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "700__a"): out += " <creator>%s</creator>\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "245__a"): out += " <title>%s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "65017a"): out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "8564_u"): if f.split('.') == 'png': continue out += " %s\n" % encode_for_xml(f) for f in get_fieldvalues(recID, "520__a"): out += " %s\n" % encode_for_xml(f) out += " %s\n" % get_creation_date(recID) out += " \n" elif len(format) == 6 and str(format[0:3]).isdigit(): # user has asked to print some fields only if format == "001": out += "%s\n" % (format, recID, format) else: vals = get_fieldvalues(recID, format) for val in vals: out += "%s\n" % (format, val, format) elif format.startswith('t'): ## user directly asked for some tags to be displayed only if record_exist_p == -1: out += get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden) else: out += get_fieldvalues_alephseq_like(recID, ot, can_see_hidden) elif format == "hm": if record_exist_p == -1: out += "\n

" + cgi.escape(get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden)) + "
" else: out += "\n
" + cgi.escape(get_fieldvalues_alephseq_like(recID, ot, can_see_hidden)) + "
" elif format.startswith("h") and ot: ## user directly asked for some tags to be displayed only if record_exist_p == -1: out += "\n
" + get_fieldvalues_alephseq_like(recID, ["001", CFG_OAI_ID_FIELD, "980"], can_see_hidden) + "
" else: out += "\n
" + get_fieldvalues_alephseq_like(recID, ot, can_see_hidden) + "
" elif format == "hd": # HTML detailed format if record_exist_p == -1: out += _("The record has been deleted.") else: # look for detailed format existence: query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s" res = run_sql(query, (recID, format), 1) if res: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format: out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) if out_record_in_format: out += out_record_in_format else: out += websearch_templates.tmpl_print_record_detailed( ln = ln, recID = recID, ) elif format.startswith("hb_") or format.startswith("hd_"): # underscore means that HTML brief/detailed formats should be called on-the-fly; suitable for testing formats if record_exist_p == -1: out += _("The record has been deleted.") else: out += call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) elif format.startswith("hx"): # BibTeX format, called on the fly: if record_exist_p == -1: out += _("The record has been deleted.") else: out += call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) elif format.startswith("hs"): # for citation/download similarity navigation links: if record_exist_p == -1: out += _("The record has been deleted.") else: out += '' % websearch_templates.build_search_url(recid=recID, ln=ln) # firstly, title: titles = get_fieldvalues(recID, "245__a") if titles: for title in titles: out += "%s" % title else: # usual title not found, try conference title: titles = get_fieldvalues(recID, "111__a") if titles: for title in titles: out += "%s" % title else: # just print record ID: out += "%s %d" % (get_field_i18nname("record ID", ln, False), recID) out += "" # secondly, authors: authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a") if authors: out += " - %s" % authors[0] if len(authors) > 1: out += " et al" # thirdly publication info: publinfos = get_fieldvalues(recID, "773__s") if not publinfos: publinfos = get_fieldvalues(recID, "909C4s") if not publinfos: publinfos = get_fieldvalues(recID, "037__a") if not publinfos: publinfos = get_fieldvalues(recID, "088__a") if publinfos: out += " - %s" % publinfos[0] else: # fourthly publication year (if not publication info): years = get_fieldvalues(recID, "773__y") if not years: years = get_fieldvalues(recID, "909C4y") if not years: years = get_fieldvalues(recID, "260__c") if years: out += " (%s)" % years[0] else: # HTML brief format by default if record_exist_p == -1: out += _("The record has been deleted.") else: query = "SELECT value FROM bibfmt WHERE id_bibrec=%s AND format=%s" res = run_sql(query, (recID, format)) if res: # record 'recID' is formatted in 'format', so print it out += "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly: or use default format: if CFG_WEBSEARCH_CALL_BIBFORMAT: out_record_in_format = call_bibformat(recID, format, ln, search_pattern=search_pattern, user_info=user_info, verbose=verbose) if out_record_in_format: out += out_record_in_format else: out += websearch_templates.tmpl_print_record_brief( ln = ln, recID = recID, ) else: out += websearch_templates.tmpl_print_record_brief( ln = ln, recID = recID, ) # at the end of HTML brief mode, print the "Detailed record" functionality: if format == 'hp' or format.startswith("hb_") or format.startswith("hd_"): pass # do nothing for portfolio and on-the-fly formats else: out += websearch_templates.tmpl_print_record_brief_links(ln=ln, recID=recID, sf=sf, so=so, sp=sp, rm=rm, display_claim_link=display_claim_this_paper) # print record closing tags, if needed: if format == "marcxml" or format == "oai_dc": out += " \n" out += " \n" return out def call_bibformat(recID, format="HD", ln=CFG_SITE_LANG, search_pattern=None, user_info=None, verbose=0): """ Calls BibFormat and returns formatted record. BibFormat will decide by itself if old or new BibFormat must be used. """ from invenio.bibformat_utils import get_pdf_snippets keywords = [] if search_pattern is not None: units = create_basic_search_units(None, str(search_pattern), None) keywords = [unit[1] for unit in units if (unit[0] != '-' and unit[2] in [None, 'fulltext'])] out = format_record(recID, of=format, ln=ln, search_pattern=keywords, user_info=user_info, verbose=verbose) if CFG_WEBSEARCH_FULLTEXT_SNIPPETS and user_info and \ 'fulltext' in user_info['uri']: # check snippets only if URL contains fulltext # FIXME: make it work for CLI too, via new function arg if keywords: snippets = get_pdf_snippets(recID, keywords) if snippets: out += snippets return out def log_query(hostname, query_args, uid=-1): """ Log query into the query and user_query tables. Return id_query or None in case of problems. """ id_query = None if uid >= 0: # log the query only if uid is reasonable res = run_sql("SELECT id FROM query WHERE urlargs=%s", (query_args,), 1) try: id_query = res[0][0] except: id_query = run_sql("INSERT INTO query (type, urlargs) VALUES ('r', %s)", (query_args,)) if id_query: run_sql("INSERT INTO user_query (id_user, id_query, hostname, date) VALUES (%s, %s, %s, %s)", (uid, id_query, hostname, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) return id_query def log_query_info(action, p, f, colls, nb_records_found_total=-1): """Write some info to the log file for later analysis.""" try: log = open(CFG_LOGDIR + "/search.log", "a") log.write(time.strftime("%Y%m%d%H%M%S#", time.localtime())) log.write(action+"#") log.write(p+"#") log.write(f+"#") for coll in colls[:-1]: log.write("%s," % coll) log.write("%s#" % colls[-1]) log.write("%d" % nb_records_found_total) log.write("\n") log.close() except: pass return ### CALLABLES def perform_request_search(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, sf="", so="d", sp="", rm="", of="id", ot="", aas=0, p1="", f1="", m1="", op1="", p2="", f2="", m2="", op2="", p3="", f3="", m3="", sc=0, jrec=0, recid=-1, recidb=-1, sysno="", id=-1, idb=-1, sysnb="", action="", d1="", d1y=0, d1m=0, d1d=0, d2="", d2y=0, d2m=0, d2d=0, dt="", verbose=0, ap=0, ln=CFG_SITE_LANG, ec=None, tab="", wl=CFG_WEBSEARCH_WILDCARD_LIMIT): """Perform search or browse request, without checking for authentication. Return list of recIDs found, if of=id. Otherwise create web page. The arguments are as follows: req - mod_python Request class instance. cc - current collection (e.g. "ATLAS"). The collection the user started to search/browse from. c - collection list (e.g. ["Theses", "Books"]). The collections user may have selected/deselected when starting to search from 'cc'. p - pattern to search for (e.g. "ellis and muon or kaon"). f - field to search within (e.g. "author"). rg - records in groups of (e.g. "10"). Defines how many hits per collection in the search results page are displayed. sf - sort field (e.g. "title"). so - sort order ("a"=ascending, "d"=descending). sp - sort pattern (e.g. "CERN-") -- in case there are more values in a sort field, this argument tells which one to prefer rm - ranking method (e.g. "jif"). Defines whether results should be ranked by some known ranking method. of - output format (e.g. "hb"). Usually starting "h" means HTML output (and "hb" for HTML brief, "hd" for HTML detailed), "x" means XML output, "t" means plain text output, "id" means no output at all but to return list of recIDs found. (Suitable for high-level API.) ot - output only these MARC tags (e.g. "100,700,909C0b"). Useful if only some fields are to be shown in the output, e.g. for library to control some fields. aas - advanced search ("0" means no, "1" means yes). Whether search was called from within the advanced search interface. p1 - first pattern to search for in the advanced search interface. Much like 'p'. f1 - first field to search within in the advanced search interface. Much like 'f'. m1 - first matching type in the advanced search interface. ("a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression). op1 - first operator, to join the first and the second unit in the advanced search interface. ("a" add, "o" or, "n" not). p2 - second pattern to search for in the advanced search interface. Much like 'p'. f2 - second field to search within in the advanced search interface. Much like 'f'. m2 - second matching type in the advanced search interface. ("a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression). op2 - second operator, to join the second and the third unit in the advanced search interface. ("a" add, "o" or, "n" not). p3 - third pattern to search for in the advanced search interface. Much like 'p'. f3 - third field to search within in the advanced search interface. Much like 'f'. m3 - third matching type in the advanced search interface. ("a" all of the words, "o" any of the words, "e" exact phrase, "p" partial phrase, "r" regular expression). sc - split by collection ("0" no, "1" yes). Governs whether we want to present the results in a single huge list, or splitted by collection. jrec - jump to record (e.g. "234"). Used for navigation inside the search results. recid - display record ID (e.g. "20000"). Do not search/browse but go straight away to the Detailed record page for the given recID. recidb - display record ID bis (e.g. "20010"). If greater than 'recid', then display records from recid to recidb. Useful for example for dumping records from the database for reformatting. sysno - display old system SYS number (e.g. ""). If you migrate to Invenio from another system, and store your old SYS call numbers, you can use them instead of recid if you wish so. id - the same as recid, in case recid is not set. For backwards compatibility. idb - the same as recid, in case recidb is not set. For backwards compatibility. sysnb - the same as sysno, in case sysno is not set. For backwards compatibility. action - action to do. "SEARCH" for searching, "Browse" for browsing. Default is to search. d1 - first datetime in full YYYY-mm-dd HH:MM:DD format (e.g. "1998-08-23 12:34:56"). Useful for search limits on creation/modification date (see 'dt' argument below). Note that 'd1' takes precedence over d1y, d1m, d1d if these are defined. d1y - first date's year (e.g. "1998"). Useful for search limits on creation/modification date. d1m - first date's month (e.g. "08"). Useful for search limits on creation/modification date. d1d - first date's day (e.g. "23"). Useful for search limits on creation/modification date. d2 - second datetime in full YYYY-mm-dd HH:MM:DD format (e.g. "1998-09-02 12:34:56"). Useful for search limits on creation/modification date (see 'dt' argument below). Note that 'd2' takes precedence over d2y, d2m, d2d if these are defined. d2y - second date's year (e.g. "1998"). Useful for search limits on creation/modification date. d2m - second date's month (e.g. "09"). Useful for search limits on creation/modification date. d2d - second date's day (e.g. "02"). Useful for search limits on creation/modification date. dt - first and second date's type (e.g. "c"). Specifies whether to search in creation dates ("c") or in modification dates ("m"). When dt is not set and d1* and d2* are set, the default is "c". verbose - verbose level (0=min, 9=max). Useful to print some internal information on the searching process in case something goes wrong. ap - alternative patterns (0=no, 1=yes). In case no exact match is found, the search engine can try alternative patterns e.g. to replace non-alphanumeric characters by a boolean query. ap defines if this is wanted. ln - language of the search interface (e.g. "en"). Useful for internationalization. ec - list of external search engines to search as well (e.g. "SPIRES HEP"). wl - wildcard limit (ex: 100) the wildcard queries will be limited at 100 results """ selected_external_collections_infos = None # wash output format: of = wash_output_format(of) # raise an exception when trying to print out html from the cli if of.startswith("h"): assert req # for every search engine request asking for an HTML output, we # first regenerate cache of collection and field I18N names if # needed; so that later we won't bother checking timestamps for # I18N names at all: if of.startswith("h"): collection_i18nname_cache.recreate_cache_if_needed() field_i18nname_cache.recreate_cache_if_needed() # wash all arguments requiring special care try: (cc, colls_to_display, colls_to_search, hosted_colls, wash_colls_debug) = wash_colls(cc, c, sc, verbose) # which colls to search and to display? except InvenioWebSearchUnknownCollectionError, exc: colname = exc.colname if of.startswith("h"): page_start(req, of, cc, aas, ln, getUid(req), websearch_templates.tmpl_collection_not_found_page_title(colname, ln)) req.write(websearch_templates.tmpl_collection_not_found_page_body(colname, ln)) return page_end(req, of, ln) elif of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) else: return page_end(req, of, ln) p = wash_pattern(p) f = wash_field(f) p1 = wash_pattern(p1) f1 = wash_field(f1) p2 = wash_pattern(p2) f2 = wash_field(f2) p3 = wash_pattern(p3) f3 = wash_field(f3) datetext1, datetext2 = wash_dates(d1, d1y, d1m, d1d, d2, d2y, d2m, d2d) # wash ranking method: if not is_method_valid(None, rm): rm = "" _ = gettext_set_language(ln) # backwards compatibility: id, idb, sysnb -> recid, recidb, sysno (if applicable) if sysnb != "" and sysno == "": sysno = sysnb if id > 0 and recid == -1: recid = id if idb > 0 and recidb == -1: recidb = idb # TODO deduce passed search limiting criterias (if applicable) pl, pl_in_url = "", "" # no limits by default if action != "browse" and req and not isinstance(req, cStringIO.OutputType) \ and req.args: # we do not want to add options while browsing or while calling via command-line fieldargs = cgi.parse_qs(req.args) for fieldcode in get_fieldcodes(): if fieldargs.has_key(fieldcode): for val in fieldargs[fieldcode]: pl += "+%s:\"%s\" " % (fieldcode, val) pl_in_url += "&%s=%s" % (urllib.quote(fieldcode), urllib.quote(val)) # deduce recid from sysno argument (if applicable): if sysno: # ALEPH SYS number was passed, so deduce DB recID for the record: recid = get_mysql_recid_from_aleph_sysno(sysno) if recid is None: recid = 0 # use recid 0 to indicate that this sysno does not exist # deduce collection we are in (if applicable): if recid > 0: referer = None if req: referer = req.headers_in.get('Referer') cc = guess_collection_of_a_record(recid, referer) # deduce user id (if applicable): try: uid = getUid(req) except: uid = 0 ## 0 - start output if recid >= 0: # recid can be 0 if deduced from sysno and if such sysno does not exist ## 1 - detailed record display title, description, keywords = \ websearch_templates.tmpl_record_page_header_content(req, recid, ln) if req is not None and not req.header_only: page_start(req, of, cc, aas, ln, uid, title, description, keywords, recid, tab) # Default format is hb but we are in detailed -> change 'of' if of == "hb": of = "hd" if record_exists(recid): if recidb <= recid: # sanity check recidb = recid + 1 if of == "id": return [recidx for recidx in range(recid, recidb) if record_exists(recidx)] else: print_records(req, range(recid, recidb), -1, -9999, of, ot, ln, search_pattern=p, verbose=verbose, tab=tab, sf=sf, so=so, sp=sp, rm=rm) if req and of.startswith("h"): # register detailed record page view event client_ip_address = str(req.remote_ip) register_page_view_event(recid, uid, client_ip_address) else: # record does not exist if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) elif of.startswith("h"): if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND else: print_warning(req, _("Requested record does not seem to exist.")) elif action == "browse": ## 2 - browse needed of = 'hb' page_start(req, of, cc, aas, ln, uid, _("Browse"), p=create_page_title_search_pattern_info(p, p1, p2, p3)) req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action)) try: if aas == 1 or (p1 or p2 or p3): browse_pattern(req, colls_to_search, p1, f1, rg, ln) browse_pattern(req, colls_to_search, p2, f2, rg, ln) browse_pattern(req, colls_to_search, p3, f3, rg, ln) else: browse_pattern(req, colls_to_search, p, f, rg, ln) except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) elif rm and p.startswith("recid:"): ## 3-ter - similarity search (or old-style citation search) needed if req and not req.header_only: page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3)) if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action)) if record_exists(p[6:]) != 1: # record does not exist if of.startswith("h"): if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND else: print_warning(req, _("Requested record does not seem to exist.")) if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) else: # record well exists, so find similar ones to it t1 = os.times()[4] results_similar_recIDs, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, results_similar_comments = \ rank_records(rm, 0, get_collection_reclist(cc), string.split(p), verbose) if results_similar_recIDs: t2 = os.times()[4] cpu_time = t2 - t1 if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, cc, len(results_similar_recIDs), jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) print_warning(req, results_similar_comments) print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) elif of=="id": return results_similar_recIDs elif of.startswith("x"): print_records(req, results_similar_recIDs, jrec, rg, of, ot, ln, results_similar_relevances, results_similar_relevances_prologue, results_similar_relevances_epilogue, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) else: # rank_records failed and returned some error message to display: if of.startswith("h"): print_warning(req, results_similar_relevances_prologue) print_warning(req, results_similar_relevances_epilogue) print_warning(req, results_similar_comments) if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) elif p.startswith("cocitedwith:"): #WAS EXPERIMENTAL ## 3-terter - cited by search needed page_start(req, of, cc, aas, ln, uid, _("Search Results"), p=create_page_title_search_pattern_info(p, p1, p2, p3)) if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action)) recID = p[12:] if record_exists(recID) != 1: # record does not exist if of.startswith("h"): print_warning(req, _("Requested record does not seem to exist.")) if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) else: # record well exists, so find co-cited ones: t1 = os.times()[4] results_cocited_recIDs = map(lambda x: x[0], calculate_co_cited_with_list(int(recID))) if results_cocited_recIDs: t2 = os.times()[4] cpu_time = t2 - t1 if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, CFG_SITE_NAME, len(results_cocited_recIDs), jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) elif of=="id": return results_cocited_recIDs elif of.startswith("x"): print_records(req, results_cocited_recIDs, jrec, rg, of, ot, ln, search_pattern=p, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) else: # cited rank_records failed and returned some error message to display: if of.startswith("h"): print_warning(req, "nothing found") if of == "id": return [] elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) else: ## 3 - common search needed query_in_cache = False query_representation_in_cache = repr((p,f,colls_to_search, wl)) page_start(req, of, cc, aas, ln, uid, p=create_page_title_search_pattern_info(p, p1, p2, p3)) if of.startswith("h") and verbose and wash_colls_debug: print_warning(req, "wash_colls debugging info : %s" % wash_colls_debug) # search into the hosted collections only if the output format is html or xml if hosted_colls and (of.startswith("h") or of.startswith("x")) and not p.startswith("recid:"): # hosted_colls_results : the hosted collections' searches that did not timeout # hosted_colls_timeouts : the hosted collections' searches that timed out and will be searched later on again (hosted_colls_results, hosted_colls_timeouts) = calculate_hosted_collections_results(req, [p, p1, p2, p3], f, hosted_colls, verbose, ln, CFG_HOSTED_COLLECTION_TIMEOUT_ANTE_SEARCH) # successful searches if hosted_colls_results: hosted_colls_true_results = [] for result in hosted_colls_results: # if the number of results is None or 0 (or False) then just do nothing if result[1] == None or result[1] == False: # these are the searches the returned no or zero results if verbose: print_warning(req, "Hosted collections (perform_search_request): %s returned no results" % result[0][1].name) else: # these are the searches that actually returned results on time hosted_colls_true_results.append(result) if verbose: print_warning(req, "Hosted collections (perform_search_request): %s returned %s results in %s seconds" % (result[0][1].name, result[1], result[2])) else: if verbose: print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections results to be printed at this time") if hosted_colls_timeouts: if verbose: for timeout in hosted_colls_timeouts: print_warning(req, "Hosted collections (perform_search_request): %s timed out and will be searched again later" % timeout[0][1].name) # we need to know for later use if there were any hosted collections to be searched even if they weren't in the end elif hosted_colls and ((not (of.startswith("h") or of.startswith("x"))) or p.startswith("recid:")): (hosted_colls_results, hosted_colls_timeouts) = (None, None) else: if verbose: print_warning(req, "Hosted collections (perform_search_request): there were no hosted collections to be searched") ## let's define some useful boolean variables: # True means there are actual or potential hosted collections results to be printed hosted_colls_actual_or_potential_results_p = not (not hosted_colls or not ((hosted_colls_results and hosted_colls_true_results) or hosted_colls_timeouts)) # True means there are hosted collections timeouts to take care of later # (useful for more accurate printing of results later) hosted_colls_potential_results_p = not (not hosted_colls or not hosted_colls_timeouts) # True means we only have hosted collections to deal with only_hosted_colls_actual_or_potential_results_p = not colls_to_search and hosted_colls_actual_or_potential_results_p if of.startswith("h"): req.write(create_search_box(cc, colls_to_display, p, f, rg, sf, so, sp, rm, of, ot, aas, ln, p1, f1, m1, op1, p2, f2, m2, op2, p3, f3, m3, sc, pl, d1y, d1m, d1d, d2y, d2m, d2d, dt, jrec, ec, action)) t1 = os.times()[4] results_in_any_collection = HitSet() if aas == 1 or (p1 or p2 or p3): ## 3A - advanced search try: results_in_any_collection = search_pattern_parenthesised(req, p1, f1, m1, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl) if len(results_in_any_collection) == 0: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) if p2: results_tmp = search_pattern_parenthesised(req, p2, f2, m2, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl) if op1 == "a": # add results_in_any_collection.intersection_update(results_tmp) elif op1 == "o": # or results_in_any_collection.union_update(results_tmp) elif op1 == "n": # not results_in_any_collection.difference_update(results_tmp) else: if of.startswith("h"): print_warning(req, "Invalid set operation %s." % cgi.escape(op1), "Error") if len(results_in_any_collection) == 0: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) if p3: results_tmp = search_pattern_parenthesised(req, p3, f3, m3, ap=ap, of=of, verbose=verbose, ln=ln, wl=wl) if op2 == "a": # add results_in_any_collection.intersection_update(results_tmp) elif op2 == "o": # or results_in_any_collection.union_update(results_tmp) elif op2 == "n": # not results_in_any_collection.difference_update(results_tmp) else: if of.startswith("h"): print_warning(req, "Invalid set operation %s." % cgi.escape(op2), "Error") except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) else: ## 3B - simple search if search_results_cache.cache.has_key(query_representation_in_cache): # query is not in the cache already, so reuse it: query_in_cache = True results_in_any_collection = search_results_cache.cache[query_representation_in_cache] if verbose and of.startswith("h"): print_warning(req, "Search stage 0: query found in cache, reusing cached results.") else: try: # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection" # recommendations when there are results only in the hosted collections. Also added the if clause to avoid # searching in case we know we only have actual or potential hosted collections results if not only_hosted_colls_actual_or_potential_results_p: results_in_any_collection = search_pattern_parenthesised(req, p, f, ap=ap, of=of, verbose=verbose, ln=ln, display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p, wl=wl) except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) if len(results_in_any_collection) == 0 and not hosted_colls_actual_or_potential_results_p: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) # store this search query results into search results cache if needed: if CFG_WEBSEARCH_SEARCH_CACHE_SIZE and not query_in_cache: if len(search_results_cache.cache) > CFG_WEBSEARCH_SEARCH_CACHE_SIZE: search_results_cache.clear() search_results_cache.cache[query_representation_in_cache] = results_in_any_collection if verbose and of.startswith("h"): print_warning(req, "Search stage 3: storing query results in cache.") # search stage 4: intersection with collection universe: try: # added the display_nearest_terms_box parameter to avoid printing out the "Nearest terms in any collection" # recommendations when there results only in the hosted collections. Also added the if clause to avoid # searching in case we know since the last stage that we have no results in any collection if len(results_in_any_collection) != 0: results_final = intersect_results_with_collrecs(req, results_in_any_collection, colls_to_search, ap, of, verbose, ln, display_nearest_terms_box=not hosted_colls_actual_or_potential_results_p) else: results_final = {} except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) if results_final == {} and not hosted_colls_actual_or_potential_results_p: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) if of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) # search stage 5: apply search option limits and restrictions: if datetext1 != "" and results_final != {}: if verbose and of.startswith("h"): print_warning(req, "Search stage 5: applying time etc limits, from %s until %s..." % (datetext1, datetext2)) try: results_final = intersect_results_with_hitset(req, results_final, search_unit_in_bibrec(datetext1, datetext2, dt), ap, aptext= _("No match within your time limits, " "discarding this condition..."), of=of) except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) if results_final == {} and not hosted_colls_actual_or_potential_results_p: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) #if of.startswith("x"): # # Print empty, but valid XML # print_records_prologue(req, of) # print_records_epilogue(req, of) return page_end(req, of, ln) if pl and results_final != {}: pl = wash_pattern(pl) if verbose and of.startswith("h"): print_warning(req, "Search stage 5: applying search pattern limit %s..." % cgi.escape(pl)) try: results_final = intersect_results_with_hitset(req, results_final, search_pattern_parenthesised(req, pl, ap=0, ln=ln, wl=wl), ap, aptext=_("No match within your search limits, " "discarding this condition..."), of=of) except: register_exception(req=req, alert_admin=True) if of.startswith("h"): req.write(create_error_box(req, verbose=verbose, ln=ln)) perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) if results_final == {} and not hosted_colls_actual_or_potential_results_p: if of.startswith("h"): perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) if of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) return page_end(req, of, ln) t2 = os.times()[4] cpu_time = t2 - t1 ## search stage 6: display results: results_final_nb_total = 0 results_final_nb = {} # will hold number of records found in each collection # (in simple dict to display overview more easily) for coll in results_final.keys(): results_final_nb[coll] = len(results_final[coll]) #results_final_nb_total += results_final_nb[coll] # Now let us calculate results_final_nb_total more precisely, # in order to get the total number of "distinct" hits across # searched collections; this is useful because a record might # have been attributed to more than one primary collection; so # we have to avoid counting it multiple times. The price to # pay for this accuracy of results_final_nb_total is somewhat # increased CPU time. if results_final.keys() == 1: # only one collection; no need to union them results_final_for_all_selected_colls = results_final.values()[0] results_final_nb_total = results_final_nb.values()[0] else: # okay, some work ahead to union hits across collections: results_final_for_all_selected_colls = HitSet() for coll in results_final.keys(): results_final_for_all_selected_colls.union_update(results_final[coll]) results_final_nb_total = len(results_final_for_all_selected_colls) #if hosted_colls and (of.startswith("h") or of.startswith("x")): if hosted_colls_actual_or_potential_results_p: if hosted_colls_results: for result in hosted_colls_true_results: colls_to_search.append(result[0][1].name) results_final_nb[result[0][1].name] = result[1] results_final_nb_total += result[1] cpu_time += result[2] if hosted_colls_timeouts: for timeout in hosted_colls_timeouts: colls_to_search.append(timeout[1].name) # use -963 as a special number to identify the collections that timed out results_final_nb[timeout[1].name] = -963 # we continue past this point only if there is a hosted collection that has timed out and might offer potential results if results_final_nb_total ==0 and not hosted_colls_potential_results_p: if of.startswith("h"): print_warning(req, "No match found, please enter different search terms.") elif of.startswith("x"): # Print empty, but valid XML print_records_prologue(req, of) print_records_epilogue(req, of) else: # yes, some hits found: good! # collection list may have changed due to not-exact-match-found policy so check it out: for coll in results_final.keys(): if coll not in colls_to_search: colls_to_search.append(coll) # print results overview: if of == "id": # we have been asked to return list of recIDs recIDs = list(results_final_for_all_selected_colls) if sf: # do we have to sort? recIDs = sort_records(req, recIDs, sf, so, sp, verbose, of) elif rm: # do we have to rank? results_final_for_all_colls_rank_records_output = rank_records(rm, 0, results_final_for_all_selected_colls, string.split(p) + string.split(p1) + string.split(p2) + string.split(p3), verbose) if results_final_for_all_colls_rank_records_output[0]: recIDs = results_final_for_all_colls_rank_records_output[0] return recIDs elif of.startswith("h"): if of not in ['hcs']: # added the hosted_colls_potential_results_p parameter to help print out the overview more accurately req.write(print_results_overview(colls_to_search, results_final_nb_total, results_final_nb, cpu_time, ln, ec, hosted_colls_potential_results_p=hosted_colls_potential_results_p)) selected_external_collections_infos = print_external_results_overview(req, cc, [p, p1, p2, p3], f, ec, verbose, ln) # print number of hits found for XML outputs: if of.startswith("x"): req.write("\n" % results_final_nb_total) # print records: if of in ['hcs']: # feed the current search to be summarized: from invenio.search_engine_summarizer import summarize_records search_p = p search_f = f if not p and (aas == 1 or p1 or p2 or p3): op_d = {'n': ' and not ', 'a': ' and ', 'o': ' or ', '': ''} triples = ziplist([f1, f2, f3], [p1, p2, p3], [op1, op2, '']) triples_len = len(triples) for i in range(triples_len): fi, pi, oi = triples[i] # e.g.: if i < triples_len-1 and not triples[i+1][1]: # if p2 empty triples[i+1][0] = '' # f2 must be too oi = '' # and o1 if ' ' in pi: pi = '"'+pi+'"' if fi: fi = fi + ':' search_p += fi + pi + op_d[oi] search_f = '' summarize_records(results_final_for_all_selected_colls, 'hcs', ln, search_p, search_f, req) else: if len(colls_to_search)>1: cpu_time = -1 # we do not want to have search time printed on each collection print_records_prologue(req, of) for coll in colls_to_search: if results_final.has_key(coll) and len(results_final[coll]): if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) results_final_recIDs = list(results_final[coll]) results_final_relevances = [] results_final_relevances_prologue = "" results_final_relevances_epilogue = "" if sf: # do we have to sort? results_final_recIDs = sort_records(req, results_final_recIDs, sf, so, sp, verbose, of) elif rm: # do we have to rank? results_final_recIDs_ranked, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, results_final_comments = \ rank_records(rm, 0, results_final[coll], string.split(p) + string.split(p1) + string.split(p2) + string.split(p3), verbose) if of.startswith("h"): print_warning(req, results_final_comments) if results_final_recIDs_ranked: results_final_recIDs = results_final_recIDs_ranked else: # rank_records failed and returned some error message to display: print_warning(req, results_final_relevances_prologue) print_warning(req, results_final_relevances_epilogue) print_records(req, results_final_recIDs, jrec, rg, of, ot, ln, results_final_relevances, results_final_relevances_prologue, results_final_relevances_epilogue, search_pattern=p, print_records_prologue_p=False, print_records_epilogue_p=False, verbose=verbose, sf=sf, so=so, sp=sp, rm=rm) if of.startswith("h"): req.write(print_search_info(p, f, sf, so, sp, rm, of, ot, coll, results_final_nb[coll], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) #if hosted_colls and (of.startswith("h") or of.startswith("x")): if hosted_colls_actual_or_potential_results_p: if hosted_colls_results: # TODO: add a verbose message here for result in hosted_colls_true_results: if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg)) if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, results_final_nb[result[0][1].name], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) if hosted_colls_timeouts: # TODO: add a verbose message here # TODO: check if verbose messages still work when dealing with (re)calculations of timeouts (hosted_colls_timeouts_results, hosted_colls_timeouts_timeouts) = do_calculate_hosted_collections_results(req, ln, None, verbose, None, hosted_colls_timeouts, CFG_HOSTED_COLLECTION_TIMEOUT_POST_SEARCH) if hosted_colls_timeouts_results: for result in hosted_colls_timeouts_results: if result[1] == None or result[1] == False: ## these are the searches the returned no or zero results ## also print a nearest terms box, in case this is the only ## collection being searched and it returns no results? if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963, jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, no_records_found=True, limit=rg)) req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, -963, jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) else: # these are the searches that actually returned results on time if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) req.write(print_hosted_results(url_and_engine=result[0], ln=ln, of=of, req=req, limit=rg)) if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, result[0][1].name, result[1], jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) if hosted_colls_timeouts_timeouts: for timeout in hosted_colls_timeouts_timeouts: if of.startswith("h"): req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963, jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time)) req.write(print_hosted_results(url_and_engine=timeout[0], ln=ln, of=of, req=req, search_timed_out=True, limit=rg)) req.write(print_hosted_search_info(p, f, sf, so, sp, rm, of, ot, timeout[1].name, -963, jrec, rg, aas, ln, p1, p2, p3, f1, f2, f3, m1, m2, m3, op1, op2, sc, pl_in_url, d1y, d1m, d1d, d2y, d2m, d2d, dt, cpu_time, 1)) print_records_epilogue(req, of) if f == "author" and of.startswith("h"): req.write(create_similarly_named_authors_link_box(p, ln)) # log query: try: id_query = log_query(req.remote_host, req.args, uid) if of.startswith("h") and id_query: if not of in ['hcs']: # display alert/RSS teaser for non-summary formats: user_info = collect_user_info(req) display_email_alert_part = True if user_info: if user_info['email'] == 'guest': if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS > 4: display_email_alert_part = False else: if not user_info['precached_usealerts']: display_email_alert_part = False req.write(websearch_templates.tmpl_alert_rss_teaser_box_for_query(id_query, \ ln=ln, display_email_alert_part=display_email_alert_part)) except: # do not log query if req is None (used by CLI interface) pass log_query_info("ss", p, f, colls_to_search, results_final_nb_total) # External searches if of.startswith("h"): if not of in ['hcs']: perform_external_collection_search(req, cc, [p, p1, p2, p3], f, ec, verbose, ln, selected_external_collections_infos) return page_end(req, of, ln) def perform_request_cache(req, action="show"): """Manipulates the search engine cache.""" req.content_type = "text/html" req.send_http_header() req.write("") out = "" out += "

Search Cache

" # clear cache if requested: if action == "clear": search_results_cache.clear() req.write(out) # show collection reclist cache: out = "

Collection reclist cache

" out += "- collection table last updated: %s" % get_table_update_time('collection') out += "
- reclist cache timestamp: %s" % collection_reclist_cache.timestamp out += "
- reclist cache contents:" out += "
" for coll in collection_reclist_cache.cache.keys(): if collection_reclist_cache.cache[coll]: out += "%s (%d)
" % (coll, len(collection_reclist_cache.cache[coll])) out += "
" req.write(out) # show search results cache: out = "

Search Cache

" out += "- search cache usage: %d queries cached (max. ~%d)" % \ (len(search_results_cache.cache), CFG_WEBSEARCH_SEARCH_CACHE_SIZE) if len(search_results_cache.cache): out += "
- search cache contents:" out += "
" for query, hitset in search_results_cache.cache.items(): out += "
%s ... %s" % (query, hitset) out += """

clear search results cache""" % CFG_SITE_URL out += "

" req.write(out) # show field i18nname cache: out = "

Field I18N names cache

" out += "- fieldname table last updated: %s" % get_table_update_time('fieldname') out += "
- i18nname cache timestamp: %s" % field_i18nname_cache.timestamp out += "
- i18nname cache contents:" out += "
" for field in field_i18nname_cache.cache.keys(): for ln in field_i18nname_cache.cache[field].keys(): out += "%s, %s = %s
" % (field, ln, field_i18nname_cache.cache[field][ln]) out += "
" req.write(out) # show collection i18nname cache: out = "

Collection I18N names cache

" out += "- collectionname table last updated: %s" % get_table_update_time('collectionname') out += "
- i18nname cache timestamp: %s" % collection_i18nname_cache.timestamp out += "
- i18nname cache contents:" out += "
" for coll in collection_i18nname_cache.cache.keys(): for ln in collection_i18nname_cache.cache[coll].keys(): out += "%s, %s = %s
" % (coll, ln, collection_i18nname_cache.cache[coll][ln]) out += "
" req.write(out) req.write("") return "\n" def perform_request_log(req, date=""): """Display search log information for given date.""" req.content_type = "text/html" req.send_http_header() req.write("") req.write("

Search Log

") if date: # case A: display stats for a day yyyymmdd = string.atoi(date) req.write("

Date: %d

" % yyyymmdd) req.write("""""") req.write("" % ("No.", "Time", "Pattern", "Field", "Collection", "Number of Hits")) # read file: p = os.popen("grep ^%d %s/search.log" % (yyyymmdd, CFG_LOGDIR), 'r') lines = p.readlines() p.close() # process lines: i = 0 for line in lines: try: datetime, dummy_aas, p, f, c, nbhits = string.split(line,"#") i += 1 req.write("" \ % (i, datetime[8:10], datetime[10:12], datetime[12:], p, f, c, nbhits)) except: pass # ignore eventual wrong log lines req.write("
%s%s%s%s%s%s
#%d%s:%s:%s%s%s%s%s
") else: # case B: display summary stats per day yyyymm01 = int(time.strftime("%Y%m01", time.localtime())) yyyymmdd = int(time.strftime("%Y%m%d", time.localtime())) req.write("""""") req.write("" % ("Day", "Number of Queries")) for day in range(yyyymm01, yyyymmdd + 1): p = os.popen("grep -c ^%d %s/search.log" % (day, CFG_LOGDIR), 'r') for line in p.readlines(): req.write("""""" % \ (day, CFG_SITE_URL, day, line)) p.close() req.write("
%s%s
%s%s
") req.write("") return "\n" def get_most_popular_field_values(recids, tags, exclude_values=None, count_repetitive_values=True): """ Analyze RECIDS and look for TAGS and return most popular values and the frequency with which they occur sorted according to descending frequency. If a value is found in EXCLUDE_VALUES, then do not count it. If COUNT_REPETITIVE_VALUES is True, then we count every occurrence of value in the tags. If False, then we count the value only once regardless of the number of times it may appear in a record. (But, if the same value occurs in another record, we count it, of course.) Example: >>> get_most_popular_field_values(range(11,20), '980__a') (('PREPRINT', 10), ('THESIS', 7), ...) >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a')) (('Ellis, J', 10), ('Ellis, N', 7), ...) >>> get_most_popular_field_values(range(11,20), ('100__a', '700__a'), ('Ellis, J')) (('Ellis, N', 7), ...) """ def _get_most_popular_field_values_helper_sorter(val1, val2): "Compare VAL1 and VAL2 according to, firstly, frequency, then secondly, alphabetically." compared_via_frequencies = cmp(valuefreqdict[val2], valuefreqdict[val1]) if compared_via_frequencies == 0: return cmp(val1.lower(), val2.lower()) else: return compared_via_frequencies valuefreqdict = {} ## sanity check: if not exclude_values: exclude_values = [] if isinstance(tags, str): tags = (tags,) ## find values to count: vals_to_count = [] displaytmp = {} if count_repetitive_values: # counting technique A: can look up many records at once: (very fast) for tag in tags: vals_to_count.extend(get_fieldvalues(recids, tag)) else: # counting technique B: must count record-by-record: (slow) for recid in recids: vals_in_rec = [] for tag in tags: for val in get_fieldvalues(recid, tag, False): vals_in_rec.append(val) # do not count repetitive values within this record # (even across various tags, so need to unify again): dtmp = {} for val in vals_in_rec: dtmp[val.lower()] = 1 displaytmp[val.lower()] = val vals_in_rec = dtmp.keys() vals_to_count.extend(vals_in_rec) ## are we to exclude some of found values? for val in vals_to_count: if val not in exclude_values: if valuefreqdict.has_key(val): valuefreqdict[val] += 1 else: valuefreqdict[val] = 1 ## sort by descending frequency of values: out = () vals = valuefreqdict.keys() vals.sort(_get_most_popular_field_values_helper_sorter) for val in vals: tmpdisplv = '' if displaytmp.has_key(val): tmpdisplv = displaytmp[val] else: tmpdisplv = val out += (tmpdisplv, valuefreqdict[val]), return out def profile(p="", f="", c=CFG_SITE_NAME): """Profile search time.""" import profile import pstats profile.run("perform_request_search(p='%s',f='%s', c='%s')" % (p, f, c), "perform_request_search_profile") p = pstats.Stats("perform_request_search_profile") p.strip_dirs().sort_stats("cumulative").print_stats() return 0 diff --git a/modules/websearch/lib/websearch_webinterface.py b/modules/websearch/lib/websearch_webinterface.py index f61a788a0..89d41e32a 100644 --- a/modules/websearch/lib/websearch_webinterface.py +++ b/modules/websearch/lib/websearch_webinterface.py @@ -1,1606 +1,1605 @@ ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSearch URL handler.""" __revision__ = "$Id$" import cgi import os import datetime import time import sys from urllib import quote from invenio import webinterface_handler_config as apache #maximum number of collaborating authors etc shown in GUI MAX_COLLAB_LIST = 10 MAX_KEYWORD_LIST = 10 MAX_VENUE_LIST = 10 #tag constants AUTHOR_TAG = "100__a" AUTHOR_INST_TAG = "100__u" COAUTHOR_TAG = "700__a" COAUTHOR_INST_TAG = "700__u" VENUE_TAG = "909C4p" KEYWORD_TAG = "695__a" FKEYWORD_TAG = "6531_a" CFG_INSPIRE_UNWANTED_KEYWORDS_START = ['talk', 'conference', 'conference proceedings', 'numerical calculations', 'experimental results', 'review', 'bibliography', 'upper limit', 'lower limit', 'tables', 'search for', 'on-shell', 'off-shell', 'formula', 'lectures', 'book', 'thesis'] CFG_INSPIRE_UNWANTED_KEYWORDS_MIDDLE = ['GeV', '(('] if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from invenio.config import \ CFG_SITE_URL, \ CFG_SITE_NAME, \ CFG_CACHEDIR, \ CFG_SITE_LANG, \ CFG_SITE_SECURE_URL, \ CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \ CFG_WEBSEARCH_INSTANT_BROWSE_RSS, \ CFG_WEBSEARCH_RSS_TTL, \ CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS, \ CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \ CFG_WEBDIR, \ CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS, \ CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \ CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL, \ CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \ CFG_WEBSEARCH_RSS_I18N_COLLECTIONS, \ CFG_INSPIRE_SITE, \ CFG_WEBSEARCH_WILDCARD_LIMIT from invenio.dbquery import Error from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.urlutils import redirect_to_url, make_canonical_urlargd, drop_default_urlargd +from invenio.htmlutils import get_mathjax_header from invenio.webuser import getUid, page_not_authorized, get_user_preferences, \ collect_user_info, logoutUser, isUserSuperAdmin from invenio.websubmit_webinterface import WebInterfaceFilesPages from invenio.webcomment_webinterface import WebInterfaceCommentsPages from invenio.bibcirculation_webinterface import WebInterfaceHoldingsPages from invenio.webpage import page, pageheaderonly, create_error_box from invenio.messages import gettext_set_language from invenio.search_engine import check_user_can_view_record, \ collection_reclist_cache, \ collection_restricted_p, \ create_similarly_named_authors_link_box, \ get_colID, \ get_coll_i18nname, \ get_fieldvalues, \ get_fieldvalues_alephseq_like, \ get_most_popular_field_values, \ get_mysql_recid_from_aleph_sysno, \ guess_primary_collection_of_a_record, \ page_end, \ page_start, \ perform_request_cache, \ perform_request_log, \ perform_request_search, \ restricted_collection_cache from invenio.access_control_engine import acc_authorize_action from invenio.access_control_config import VIEWRESTRCOLL from invenio.access_control_mailcookie import mail_cookie_create_authorize_action from invenio.bibformat import format_records from invenio.bibformat_engine import get_output_formats from invenio.websearch_webcoll import mymkdir, get_collection from invenio.intbitset import intbitset from invenio.bibupload import find_record_from_sysno from invenio.bibrank_citation_searcher import get_cited_by_list from invenio.bibrank_downloads_indexer import get_download_weight_total from invenio.search_engine_summarizer import summarize_records from invenio.errorlib import register_exception from invenio.bibedit_webinterface import WebInterfaceEditPages from invenio.bibeditmulti_webinterface import WebInterfaceMultiEditPages from invenio.bibmerge_webinterface import WebInterfaceMergePages from invenio.search_engine import get_record import invenio.template websearch_templates = invenio.template.load('websearch') search_results_default_urlargd = websearch_templates.search_results_default_urlargd search_interface_default_urlargd = websearch_templates.search_interface_default_urlargd try: output_formats = [output_format['attrs']['code'].lower() for output_format in \ get_output_formats(with_attributes=True).values()] except KeyError: output_formats = ['xd', 'xm', 'hd', 'hb', 'hs', 'hx'] output_formats.extend(['hm', 't', 'h']) def wash_search_urlargd(form): """ Create canonical search arguments from those passed via web form. """ argd = wash_urlargd(form, search_results_default_urlargd) if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] # Sometimes, users pass ot=245,700 instead of # ot=245&ot=700. Normalize that. ots = [] for ot in argd['ot']: ots += ot.split(',') argd['ot'] = ots # We can either get the mode of function as # action=, or by setting action_browse or # action_search. if argd['action_browse']: argd['action'] = 'browse' elif argd['action_search']: argd['action'] = 'search' else: if argd['action'] not in ('browse', 'search'): argd['action'] = 'search' del argd['action_browse'] del argd['action_search'] return argd class WebInterfaceUnAPIPages(WebInterfaceDirectory): """ Handle /unapi set of pages.""" _exports = [''] def __call__(self, req, form): argd = wash_urlargd(form, { 'id' : (int, 0), 'format' : (str, '')}) formats_dict = get_output_formats(True) formats = {} for format in formats_dict.values(): if format['attrs']['visibility']: formats[format['attrs']['code'].lower()] = format['attrs']['content_type'] del formats_dict if argd['id'] and argd['format']: ## Translate back common format names format = { 'nlm' : 'xn', 'marcxml' : 'xm', 'dc' : 'xd', 'endnote' : 'xe', 'mods' : 'xo' }.get(argd['format'], argd['format']) if format in formats: redirect_to_url(req, '%s/record/%s/export/%s' % (CFG_SITE_URL, argd['id'], format)) else: raise apache.SERVER_RETURN, apache.HTTP_NOT_ACCEPTABLE elif argd['id']: return websearch_templates.tmpl_unapi(formats, identifier=argd['id']) else: return websearch_templates.tmpl_unapi(formats) index = __call__ class WebInterfaceAuthorPages(WebInterfaceDirectory): """ Handle /author/Doe%2C+John page requests as well as /author/: (e.g. /author/15:Doe%2C+John) requests. The latter will try to find a person from the personid universe and will display the joint information from that particular author cluster. This interface will handle the following URLs: - /author/Doe%2C+John which will show information on the exactauthor search - /author/: (e.g. /author/15:Doe%2C+John) will try to find a person from the personid universe and will display the joint information from that particular author cluster. - /author/ (e.g. /author/152) will display the joint information from that particular author cluster (an entity called person). """ _exports = ['author'] def __init__(self, pageparam=''): """Constructor.""" self.pageparam = cgi.escape(pageparam.replace("+", " ")) self.personid = -1 self.authorname = " " self.person_data_available = False def _lookup(self, component, path): """This handler parses dynamic URLs (/author/John+Doe).""" return WebInterfaceAuthorPages(component), path - + def __call__(self, req, form): """Serve the page in the given language.""" is_bibauthorid = False bibauthorid_template = None personid_status_cacher = None userinfo = collect_user_info(req) metaheaderadd = "" try: from invenio.bibauthorid_webapi import search_person_ids_by_name from invenio.bibauthorid_webapi import get_papers_by_person_id from invenio.bibauthorid_webapi import get_person_names_from_id from invenio.bibauthorid_webapi import get_person_db_names_from_id from invenio.bibauthorid_webapi import get_person_id_from_canonical_id from invenio.bibauthorid_webapi import get_person_redirect_link from invenio.bibauthorid_webapi import is_valid_canonical_id from invenio.bibauthorid_webapi import get_personid_status_cacher from invenio.bibauthorid_utils import create_normalized_name from invenio.bibauthorid_utils import split_name_parts # from invenio.bibauthorid_config import CLAIMPAPER_CLAIM_OTHERS_PAPERS from invenio.bibauthorid_config import AID_ENABLED from invenio.bibauthorid_config import AID_ON_AUTHORPAGES bibauthorid_template = invenio.template.load('bibauthorid') # from invenio.access_control_admin import acc_find_user_role_actions - + if not AID_ENABLED or not AID_ON_AUTHORPAGES: is_bibauthorid = False else: is_bibauthorid = True except (ImportError): is_bibauthorid = False from operator import itemgetter argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0), 'recid': (int, -1) }) ln = argd['ln'] verbose = argd['verbose'] req.argd = argd #needed since perform_req_search param_recid = argd['recid'] bibauthorid_data = {"is_baid": is_bibauthorid, "pid": -1, "cid": ""} pubs = [] authors = [] recid = None nquery = "" names_dict = {} db_names_dict = {} _ = gettext_set_language(ln) title_message = "Author Details" #let's see what takes time.. time1 = time.time() genstart = time1 time2 = time.time() if is_bibauthorid: metaheaderadd = bibauthorid_template.tmpl_meta_includes() # Start the page in clean manner: req.content_type = "text/html" req.send_http_header() req.write(pageheaderonly(req=req, title=title_message, metaheaderadd=metaheaderadd, language=ln)) req.write(websearch_templates.tmpl_search_pagestart(ln=ln)) if is_bibauthorid: personid_status_cacher = get_personid_status_cacher() personid_status_cacher.recreate_cache_if_needed() self.person_data_available = personid_status_cacher.cache if not self.person_data_available: is_bibauthorid = False #check if it is a person id (e.g. 144): try: self.personid = int(self.pageparam) except (ValueError, TypeError): self.personid = -1 #check if it is a canonical ID (e.g. Ellis_J_1): if is_bibauthorid and is_valid_canonical_id(self.pageparam): try: self.personid = int(get_person_id_from_canonical_id(self.pageparam)) except (ValueError, TypeError): self.personid = -1 if self.personid < 0 and is_bibauthorid: if param_recid > -1: # Well, it's not a person id, did we get a record ID? recid = param_recid nquery = self.pageparam elif self.pageparam.count(":"): # No recid passed, maybe name is recid:name or name:recid pair? left, right = self.pageparam.split(":") try: recid = int(left) nquery = str(right) except (ValueError, TypeError): try: recid = int(right) nquery = str(left) except (ValueError, TypeError): recid = None nquery = self.pageparam else: # No recid could be determined. Work with name only nquery = self.pageparam sorted_results = search_person_ids_by_name(nquery) test_results = None if recid: for results in sorted_results: pid = results[0] authorpapers = get_papers_by_person_id(pid, -1) authorpapers = sorted(authorpapers, key=itemgetter(0), reverse=True) if (recid and not (str(recid) in [row[0] for row in authorpapers])): continue authors.append([results[0], results[1], authorpapers[0:4]]) test_results = authors else: test_results = [i for i in sorted_results if i[1][0][2] > .8] if len(test_results) == 1: self.personid = test_results[0][0] elif len(test_results) > 1: if bibauthorid_template and nquery: authors = [] for results in sorted_results: pid = results[0] authorpapers = get_papers_by_person_id(pid, -1) authorpapers = sorted(authorpapers, key=itemgetter(0), reverse=True) authors.append([results[0], results[1], authorpapers[0:4]]) srch = bibauthorid_template.tmpl_author_search body = srch(nquery, authors, author_pages_mode=True) req.write(body) return # start page # req.content_type = "text/html" # req.send_http_header() # uid = getUid(req) # page_start(req, "hb", "", "", ln, uid) if self.personid < 0 and is_bibauthorid: # Well, no person. Fall back to the exact author name search then. ptitle = '' if recid: try: - ptitle = get_record(recid)['245'][0][0][0][1] + ptitle = get_record(recid)['245'][0][0][0][1] except (IndexError,TypeError): ptitle = '"Title not available"' self.authorname = self.pageparam title = '' pmsg = '' if ptitle: pmsg = " on paper '%s'" % ptitle # We're sorry we're introducing html tags where they weren't before. XXX message = "" if CFG_INSPIRE_SITE: message += ("

We are in the process of attributing papers to people so that we can " "improve publication lists.

\n") message += ("

We have not generated the publication list for author '%s'%s. Please be patient as we " "continue to match people to author names and publications. '%s' may be attributed in the next " "few weeks.

" % (self.pageparam, pmsg, self.pageparam)) req.write('
' % title) req.write('%s
' % message) if not nquery: nquery = self.pageparam if not authors: authors = [] sorted_results = search_person_ids_by_name(nquery) for results in sorted_results: pid = results[0] authorpapers = get_papers_by_person_id(pid, -1) authorpapers = sorted(authorpapers, key=itemgetter(0), reverse=True) authors.append([results[0], results[1], authorpapers[0:4]]) srch = bibauthorid_template.tmpl_author_search body = srch(nquery, authors, author_pages_mode=True) req.write(body) return # return self._psearch(req, form, is_fallback=True, fallback_query=self.pageparam, fallback_title=title, fallback_message=message) elif self.personid < 0 and not is_bibauthorid: if not self.pageparam: return websearch_templates.tmpl_author_information(req, {}, self.authorname, 0, {}, {}, {}, {}, {}, {}, None, bibauthorid_data, ln) self.authorname = self.pageparam #search the publications by this author pubs = perform_request_search(req=None, p=self.authorname, f="exactauthor") names_dict[self.authorname] = len(pubs) db_names_dict[self.authorname] = len(pubs) elif is_bibauthorid and self.personid > -1: #yay! Person found! find only papers not disapproved by humans req.write("") full_pubs = get_papers_by_person_id(self.personid, -1) pubs = [int(row[0]) for row in full_pubs] longest_name = "" try: self.personid = int(self.personid) except (TypeError, ValueError): raise ValueError("Personid must be a number!") for aname, acount in get_person_names_from_id(self.personid): names_dict[aname] = acount norm_name = create_normalized_name(split_name_parts(aname)) if len(norm_name) > len(longest_name): longest_name = norm_name for aname, acount in get_person_db_names_from_id(self.personid): aname = aname.replace('"','').strip() db_names_dict[aname] = acount self.authorname = longest_name if not pubs and param_recid > -1: req.write("

") req.write(_("We're sorry. The requested author \"%s\" seems not to be listed on the specified paper." % (self.pageparam,))) req.write("
") req.write(_("Please try the following link to start a broader search on the author: ")) req.write('%s' % (CFG_SITE_URL, self.pageparam, self.pageparam)) req.write("

") return page_end(req, 'hb', ln) #get most frequent authors of these pubs popular_author_tuples = get_most_popular_field_values(pubs, (AUTHOR_TAG, COAUTHOR_TAG)) coauthors = {} for (coauthor, frequency) in popular_author_tuples: if coauthor not in db_names_dict: coauthors[coauthor] = frequency if len(coauthors) > MAX_COLLAB_LIST: break time1 = time.time() if verbose == 9: req.write("
popularized authors: " + str(time1 - time2) + "
") #and publication venues venuetuples = get_most_popular_field_values(pubs, (VENUE_TAG)) time2 = time.time() if verbose == 9: req.write("
venues: " + str(time2 - time1) + "
") #and keywords kwtuples = get_most_popular_field_values(pubs, (KEYWORD_TAG, FKEYWORD_TAG), count_repetitive_values=False) if CFG_INSPIRE_SITE: # filter kw tuples against unwanted keywords: kwtuples_filtered = () for (kw, num) in kwtuples: kwlower = kw.lower() kwlower_unwanted = False for unwanted_keyword in CFG_INSPIRE_UNWANTED_KEYWORDS_START: if kwlower.startswith(unwanted_keyword): kwlower_unwanted = True # unwanted keyword found break for unwanted_keyword in CFG_INSPIRE_UNWANTED_KEYWORDS_MIDDLE: if unwanted_keyword in kwlower: kwlower_unwanted = True # unwanted keyword found break if not kwlower_unwanted: kwtuples_filtered += ((kw, num),) kwtuples = kwtuples_filtered time1 = time.time() if verbose == 9: req.write("
keywords: " + str(time1 - time2) + "
") #construct a simple list of tuples that contains keywords that appear #more than once moreover, limit the length of the list #to MAX_KEYWORD_LIST kwtuples = kwtuples[0:MAX_KEYWORD_LIST] vtuples = venuetuples[0:MAX_VENUE_LIST] time2 = time.time() if verbose == 9: req.write("
misc: " + str(time2 - time1) + "
") #a dict. keys: affiliations, values: lists of publications author_aff_pubs = self.get_institute_pub_dict(pubs, db_names_dict.keys()) time1 = time.time() if verbose == 9: req.write("
affiliations: " + str(time1 - time2) + "
") totaldownloads = 0 if CFG_BIBRANK_SHOW_DOWNLOAD_STATS: #find out how many times these records have been downloaded recsloads = {} recsloads = get_download_weight_total(recsloads, pubs) #sum up for k in recsloads.keys(): totaldownloads = totaldownloads + recsloads[k] #get cited by.. citedbylist = get_cited_by_list(pubs) person_link = None - + if (is_bibauthorid and self.personid >= 0 and "precached_viewclaimlink" in userinfo and "precached_usepaperattribution" in userinfo and "precached_usepaperclaim" in userinfo and (userinfo["precached_usepaperclaim"] or userinfo["precached_usepaperattribution"]) ): person_link = self.personid bibauthorid_data["pid"] = self.personid cid = get_person_redirect_link(self.personid) if is_valid_canonical_id(cid): person_link = cid bibauthorid_data["cid"] = cid time1 = time.time() if verbose == 9: req.write("
citedby: " + str(time1 - time2) + "
") #finally all stuff there, call the template websearch_templates.tmpl_author_information(req, pubs, self.authorname, totaldownloads, author_aff_pubs, citedbylist, kwtuples, coauthors, vtuples, db_names_dict, person_link, bibauthorid_data, ln) time1 = time.time() #cited-by summary rec_query = "" extended_author_search_str = "" if bibauthorid_data['is_baid']: if bibauthorid_data["cid"]: rec_query = 'author:"%s"' % bibauthorid_data["cid"] elif bibauthorid_data["pid"] > -1: rec_query = 'author:"%s"' % bibauthorid_data["pid"] if not rec_query: rec_query = 'exactauthor:"' + self.authorname + '"' if is_bibauthorid: if len(db_names_dict.keys()) > 1: extended_author_search_str = '(' for name_index, name_query in enumerate(db_names_dict.keys()): if name_index > 0: extended_author_search_str += " OR " extended_author_search_str += 'exactauthor:"' + name_query + '"' if len(db_names_dict.keys()) > 1: extended_author_search_str += ')' if is_bibauthorid and extended_author_search_str: rec_query = extended_author_search_str if pubs: req.write(summarize_records(intbitset(pubs), 'hcs', ln, rec_query, req=req)) time2 = time.time() if verbose == 9: req.write("
summarizer: " + str(time2 - time1) + "
") # simauthbox = create_similarly_named_authors_link_box(self.authorname) # req.write(simauthbox) if verbose == 9: req.write("
all: " + str(time.time() - genstart) + "
") - + return page_end(req, 'hb', ln) def _psearch(self, req, form, is_fallback=True, fallback_query='', fallback_title='', fallback_message=''): html = [] h = html.append if fallback_title: h('
' % fallback_title) if fallback_message: h('%s
' % fallback_message) h(' We may have \'%s\' partially matched; click here ' % (fallback_query, fallback_query)) h('to see what we have so far. (Note: this is likely to update frequently.') return "\n".join(html) def get_institute_pub_dict(self, recids, names_list): """return a dictionary consisting of institute -> list of publications""" author_aff_pubs = {} #the dictionary to be built for recid in recids: #iterate all so that we get first author's intitute #if this the first author OR #"his" institute if he is an affliate author affus = [] #list of insts from the given record mainauthors = get_fieldvalues(recid, AUTHOR_TAG) mainauthor = " " if mainauthors: mainauthor = mainauthors[0] if (mainauthor in names_list): affus = get_fieldvalues(recid, AUTHOR_INST_TAG) else: #search for coauthors.. coauthor_field_lines = [] coauthorfield_content = get_fieldvalues_alephseq_like(recid, \ COAUTHOR_TAG[:3]) if coauthorfield_content: coauthor_field_lines = coauthorfield_content.split("\n") for line in coauthor_field_lines: for name_item in names_list: breakit = False if line.count(name_item) > 0: #get affilitions .. the correct ones are $$+code code = COAUTHOR_INST_TAG[-1] myparts = line.split("$$") for part in myparts: if part and part[0] == code: myaff = part[1:] affus.append(myaff) breakit = True if breakit: break #if this is empty, add a dummy " " value if (affus == []): affus = [" "] for a in affus: #add in author_aff_pubs if (author_aff_pubs.has_key(a)): tmp = author_aff_pubs[a] tmp.append(recid) author_aff_pubs[a] = tmp else: author_aff_pubs[a] = [recid] return author_aff_pubs index = __call__ class WebInterfaceRecordPages(WebInterfaceDirectory): """ Handling of a /record/ URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.keywords = self self.holdings = WebInterfaceHoldingsPages(self.recid) self.citations = self self.plots = self self.export = WebInterfaceRecordExport(self.recid, self.format) self.edit = WebInterfaceEditPages(self.recid) self.merge = WebInterfaceMergePages(self.recid) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid argd['tab'] = self.tab if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and not isUserSuperAdmin(user_info): argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /record/123 or /record/123/ index = __call__ class WebInterfaceRecordRestrictedPages(WebInterfaceDirectory): """ Handling of a /record-restricted/ URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.keywords = self self.holdings = WebInterfaceHoldingsPages(self.recid) self.citations = self self.plots = self self.export = WebInterfaceRecordExport(self.recid, self.format) self.edit = WebInterfaceEditPages(self.recid) self.merge = WebInterfaceMergePages(self.recid) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and not isUserSuperAdmin(user_info): argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS record_primary_collection = guess_primary_collection_of_a_record(self.recid) if collection_restricted_p(record_primary_collection): (auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection) if auth_code: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') # Keep all the arguments, they might be reused in the # record page itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /record/123 or /record/123/ index = __call__ class WebInterfaceSearchResultsPages(WebInterfaceDirectory): """ Handling of the /search URL and its sub-pages. """ _exports = ['', 'authenticate', 'cache', 'log'] def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) _ = gettext_set_language(argd['ln']) if req.method == 'POST': raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text=_("You are not authorized to view this area."), navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass if CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL == 2: ## Let's update the current collections list with all ## the restricted collections the user has rights to view. try: restricted_collections = user_info['precached_permitted_restricted_collections'] argd_collections = set(argd['c']) argd_collections.update(restricted_collections) argd['c'] = list(argd_collections) except KeyError: pass if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and not isUserSuperAdmin(user_info): argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS involved_collections = set() involved_collections.update(argd['c']) involved_collections.add(argd['cc']) if argd['id'] > 0: argd['recid'] = argd['id'] if argd['idb'] > 0: argd['recidb'] = argd['idb'] if argd['sysno']: tmp_recid = find_record_from_sysno(argd['sysno']) if tmp_recid: argd['recid'] = tmp_recid if argd['sysnb']: tmp_recid = find_record_from_sysno(argd['sysnb']) if tmp_recid: argd['recidb'] = tmp_recid if argd['recid'] > 0: if argd['recidb'] > argd['recid']: # Hack to check if among the restricted collections # at least a record of the range is there and # then if the user is not authorized for that # collection. recids = intbitset(xrange(argd['recid'], argd['recidb'])) restricted_collection_cache.recreate_cache_if_needed() for collname in restricted_collection_cache.cache: (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname) if auth_code and user_info['email'] == 'guest': coll_recids = get_collection(collname).reclist if coll_recids & recids: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : collname}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') else: involved_collections.add(guess_primary_collection_of_a_record(argd['recid'])) # If any of the collection requires authentication, redirect # to the authentication form. for coll in involved_collections: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): auth_code, auth_message = acc_authorize_action(req,'runbibedit') if auth_code != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out def cache(self, req, form): """Search cache page.""" argd = wash_urlargd(form, {'action': (str, 'show')}) return perform_request_cache(req, action=argd['action']) def log(self, req, form): """Search log page.""" argd = wash_urlargd(form, {'date': (str, '')}) return perform_request_log(req, date=argd['date']) def authenticate(self, req, form): """Restricted search results pages.""" argd = wash_search_urlargd(form) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') #check if the user has rights to set a high wildcard limit #if not, reduce the limit set by user, with the default one if CFG_WEBSEARCH_WILDCARD_LIMIT > 0 and (argd['wl'] > CFG_WEBSEARCH_WILDCARD_LIMIT or argd['wl'] == 0): auth_code, auth_message = acc_authorize_action(req,'runbibedit') if auth_code != 0: argd['wl'] = CFG_WEBSEARCH_WILDCARD_LIMIT # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd uid = getUid(req) if uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out index = __call__ class WebInterfaceLegacySearchPages(WebInterfaceDirectory): """ Handling of the /search.py URL and its sub-pages. """ _exports = ['', ('authenticate', 'index')] def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) # We either jump into the generic search form, or the specific # /record/... display if a recid is requested if argd['recid'] != -1: target = '/record/%d' % argd['recid'] del argd['recid'] else: target = '/search' target += make_canonical_urlargd(argd, search_results_default_urlargd) return redirect_to_url(req, target, apache.HTTP_MOVED_PERMANENTLY) index = __call__ # Parameters for the legacy URLs, of the form /?c=ALEPH legacy_collection_default_urlargd = { 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'aas': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'verbose': (int, 0), 'c': (str, CFG_SITE_NAME)} class WebInterfaceSearchInterfacePages(WebInterfaceDirectory): """ Handling of collection navigation.""" _exports = [('index.py', 'legacy_collection'), ('', 'legacy_collection'), ('search.py', 'legacy_search'), 'search', 'openurl', 'opensearchdescription', 'logout_SSO_hook'] search = WebInterfaceSearchResultsPages() legacy_search = WebInterfaceLegacySearchPages() def logout_SSO_hook(self, req, form): """Script triggered by the display of the centralized SSO logout dialog. It logouts the user from Invenio and stream back the expected picture.""" logoutUser(req) req.content_type = 'image/gif' req.encoding = None req.filename = 'wsignout.gif' req.headers_out["Content-Disposition"] = "inline; filename=wsignout.gif" req.set_content_length(os.path.getsize('%s/img/wsignout.gif' % CFG_WEBDIR)) req.send_http_header() req.sendfile('%s/img/wsignout.gif' % CFG_WEBDIR) def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for collections and records)""" if component == 'collection': c = '/'.join(path) def answer(req, form): """Accessing collections cached pages.""" # Accessing collections: this is for accessing the # cached page on top of each collection. argd = wash_urlargd(form, search_interface_default_urlargd) # We simply return the cached page of the collection argd['c'] = c if not argd['c']: # collection argument not present; display # home collection by default argd['c'] = CFG_SITE_NAME # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] return display_collection(req, **argd) return answer, [] elif component == 'record' and path and path[0] == 'merge': return WebInterfaceMergePages(), path[1:] elif component == 'record' and path and path[0] == 'edit': return WebInterfaceEditPages(), path[1:] elif component == 'record' and path and path[0] == 'multiedit': return WebInterfaceMultiEditPages(), path[1:] elif component == 'record' or component == 'record-restricted': try: if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: # let us try to recognize /record/ style of URLs: x = get_mysql_recid_from_aleph_sysno(path[0]) if x: recid = x else: recid = int(path[0]) else: recid = int(path[0]) except IndexError: # display record #1 for URL /record without a number recid = 1 except ValueError: if path[0] == '': # display record #1 for URL /record/ without a number recid = 1 else: # display page not found for URLs like /record/foo return None, [] if recid <= 0: # display page not found for URLs like /record/-5 or /record/0 return None, [] format = None tab = '' try: if path[1] in ['', 'files', 'reviews', 'comments', 'usage', 'references', 'citations', 'holdings', 'edit', 'keywords', 'multiedit', 'merge', 'plots']: tab = path[1] elif path[1] == 'export': tab = '' format = path[2] # format = None # elif path[1] in output_formats: # tab = '' # format = path[1] else: # display page not found for URLs like /record/references # for a collection where 'references' tabs is not visible return None, [] except IndexError: # Keep normal url if tabs is not specified pass #if component == 'record-restricted': #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:] #else: return WebInterfaceRecordPages(recid, tab, format), path[1:] return None, [] def openurl(self, req, form): """ OpenURL Handler.""" argd = wash_urlargd(form, websearch_templates.tmpl_openurl_accepted_args) ret_url = websearch_templates.tmpl_openurl2invenio(argd) if ret_url: return redirect_to_url(req, ret_url) else: return redirect_to_url(req, CFG_SITE_URL) def opensearchdescription(self, req, form): """OpenSearch description file""" req.content_type = "application/opensearchdescription+xml" req.send_http_header() argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG), 'verbose': (int, 0) }) return websearch_templates.tmpl_opensearch_description(ln=argd['ln']) def legacy_collection(self, req, form): """Collection URL backward compatibility handling.""" accepted_args = dict(legacy_collection_default_urlargd) argd = wash_urlargd(form, accepted_args) # Treat `as' argument specially: if argd.has_key('as'): argd['aas'] = argd['as'] del argd['as'] # If we specify no collection, then we don't need to redirect # the user, so that accessing returns the # default collection. if not form.has_key('c'): return display_collection(req, **argd) # make the collection an element of the path, and keep the # other query elements as is. If the collection is CFG_SITE_NAME, # however, redirect to the main URL. c = argd['c'] del argd['c'] if c == CFG_SITE_NAME: target = '/' else: target = '/collection/' + quote(c) # Treat `as' argument specially: # We are going to redirect, so replace `aas' by `as' visible argument: if argd.has_key('aas'): argd['as'] = argd['aas'] del argd['aas'] target += make_canonical_urlargd(argd, legacy_collection_default_urlargd) return redirect_to_url(req, target) def display_collection(req, c, aas, verbose, ln): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({'aas': aas, 'verbose': verbose, 'ln': ln}, search_interface_default_urlargd) # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: register_exception(req=req, alert_admin=True) return page(title=_("Internal Error"), body=create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # start display: req.content_type = "text/html" req.send_http_header() # deduce collection id: colID = get_colID(c) if type(colID) is not int: page_body = '

' + (_("Sorry, collection %s does not seem to exist.") % ('' + str(c) + '')) + '

' page_body = '

' + (_("You may want to start browsing from %s.") % ('' + get_coll_i18nname(CFG_SITE_NAME, ln) + '')) + '

' if req.header_only: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND return page(title=_("Collection %s Not Found") % cgi.escape(c), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') # wash `aas' argument: if not os.path.exists("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln)): # nonexistent `aas' asked for, fall back to Simple Search: aas = 0 # display collection interface page: try: filedesc = open("%s/collections/%d/navtrail-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_navtrail = filedesc.read() filedesc.close() except: c_navtrail = "" try: filedesc = open("%s/collections/%d/body-as=%d-ln=%s.html" % \ (CFG_CACHEDIR, colID, aas, ln), "r") c_body = filedesc.read() filedesc.close() except: c_body = "" try: filedesc = open("%s/collections/%d/portalbox-tp-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_tp = filedesc.read() filedesc.close() except: c_portalbox_tp = "" try: filedesc = open("%s/collections/%d/portalbox-te-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_te = filedesc.read() filedesc.close() except: c_portalbox_te = "" try: filedesc = open("%s/collections/%d/portalbox-lt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_lt = filedesc.read() filedesc.close() except: c_portalbox_lt = "" try: # show help boxes (usually located in "tr", "top right") # if users have not banned them in their preferences: c_portalbox_rt = "" if user_preferences.get('websearch_helpbox', 1) > 0: filedesc = open("%s/collections/%d/portalbox-rt-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_portalbox_rt = filedesc.read() filedesc.close() except: c_portalbox_rt = "" try: filedesc = open("%s/collections/%d/last-updated-ln=%s.html" % \ (CFG_CACHEDIR, colID, ln), "r") c_last_updated = filedesc.read() filedesc.close() except: c_last_updated = "" try: title = get_coll_i18nname(c, ln) except: title = "" show_title_p = True body_css_classes = [] if c == CFG_SITE_NAME: # Do not display title on home collection show_title_p = False body_css_classes.append('home') if len(collection_reclist_cache.cache.keys()) == 1: # if there is only one collection defined, do not print its # title on the page as it would be displayed repetitively. show_title_p = False if aas == -1: show_title_p = False # RSS: rssurl = CFG_SITE_URL + '/rss' rssurl_params = [] if c != CFG_SITE_NAME: rssurl_params.append('cc=' + quote(c)) if ln != CFG_SITE_LANG and \ c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS: rssurl_params.append('ln=' + ln) if rssurl_params: rssurl += '?' + '&'.join(rssurl_params) if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: - metaheaderadd = """ - -""" + metaheaderadd = get_mathjax_header() else: metaheaderadd = '' return page(title=title, body=c_body, navtrail=c_navtrail, description="%s - %s" % (CFG_SITE_NAME, c), keywords="%s, %s" % (CFG_SITE_NAME, c), metaheaderadd=metaheaderadd, uid=uid, language=ln, req=req, cdspageboxlefttopadd=c_portalbox_lt, cdspageboxrighttopadd=c_portalbox_rt, titleprologue=c_portalbox_tp, titleepilogue=c_portalbox_te, lastupdated=c_last_updated, navmenuid='search', rssurl=rssurl, body_css_classes=body_css_classes, show_title_p=show_title_p) class WebInterfaceRSSFeedServicePages(WebInterfaceDirectory): """RSS 2.0 feed service pages.""" def __call__(self, req, form): """RSS 2.0 feed service.""" # Keep only interesting parameters for the search default_params = websearch_templates.rss_default_urlargd # We need to keep 'jrec' and 'rg' here in order to have # 'multi-page' RSS. These parameters are not kept be default # as we don't want to consider them when building RSS links # from search and browse pages. default_params.update({'jrec':(int, 1), 'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)}) argd = wash_urlargd(form, default_params) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # Create a standard filename with these parameters current_url = websearch_templates.build_rss_url(argd) cache_filename = current_url.split('/')[-1] # In the same way as previously, add 'jrec' & 'rg' req.content_type = "application/rss+xml" req.send_http_header() try: # Try to read from cache path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) # Check if cache needs refresh filedesc = open(path, "r") last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime) assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)) c_rss = filedesc.read() filedesc.close() req.write(c_rss) return except Exception, e: # do it live and cache previous_url = None if argd['jrec'] > 1: prev_jrec = argd['jrec'] - argd['rg'] if prev_jrec < 1: prev_jrec = 1 previous_url = websearch_templates.build_rss_url(argd, jrec=prev_jrec) recIDs = perform_request_search(req, of="id", c=argd['c'], cc=argd['cc'], p=argd['p'], f=argd['f'], p1=argd['p1'], f1=argd['f1'], m1=argd['m1'], op1=argd['op1'], p2=argd['p2'], f2=argd['f2'], m2=argd['m2'], op2=argd['op2'], p3=argd['p3'], f3=argd['f3'], m3=argd['m3']) nb_found = len(recIDs) next_url = None if len(recIDs) >= argd['jrec'] + argd['rg']: next_url = websearch_templates.build_rss_url(argd, jrec=(argd['jrec'] + argd['rg'])) first_url = websearch_templates.build_rss_url(argd, jrec=1) last_url = websearch_templates.build_rss_url(argd, jrec=nb_found - argd['rg'] + 1) recIDs = recIDs[-argd['jrec']:(-argd['rg'] - argd['jrec']):-1] rss_prologue = '\n' + \ websearch_templates.tmpl_xml_rss_prologue(current_url=current_url, previous_url=previous_url, next_url=next_url, first_url=first_url, last_url=last_url, nb_found=nb_found, jrec=argd['jrec'], rg=argd['rg']) + '\n' req.write(rss_prologue) rss_body = format_records(recIDs, of='xr', ln=argd['ln'], user_info=user_info, record_separator="\n", req=req, epilogue="\n") rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n' req.write(rss_epilogue) # update cache dirname = "%s/rss" % (CFG_CACHEDIR) mymkdir(dirname) fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) try: # Remove the file just in case it already existed # so that a bit of space is created os.remove(fullfilename) except OSError: pass # Check if there's enough space to cache the request. if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS: try: os.umask(022) f = open(fullfilename, "w") f.write(rss_prologue + rss_body + rss_epilogue) f.close() except IOError, v: if v[0] == 36: # URL was too long. Never mind, don't cache pass else: raise repr(v) index = __call__ class WebInterfaceRecordExport(WebInterfaceDirectory): """ Handling of a /record//export/ URL fragment """ _exports = output_formats def __init__(self, recid, format=None): self.recid = recid self.format = format for output_format in output_formats: self.__dict__[output_format] = self return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # Check if the record belongs to a restricted primary # collection. If yes, redirect to the authenticated URL. user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if argd['rg'] > CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS and not isUserSuperAdmin(user_info): argd['rg'] = CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS if auth_code and user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: return page_not_authorized(req, "../", \ text=auth_msg, \ navmenuid='search') # mod_python does not like to return [] in case when of=id: out = perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /record/123/export/xm or /record/123/export/xm/ index = __call__ diff --git a/modules/webstyle/etc/Makefile.am b/modules/webstyle/etc/Makefile.am index 531caef25..6702897ef 100644 --- a/modules/webstyle/etc/Makefile.am +++ b/modules/webstyle/etc/Makefile.am @@ -1,27 +1,24 @@ ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -mathjaxwebdir=$(localstatedir)/www/MathJax/config -mathjaxweb_DATA=MathJax.js - fckeditorwebdir=$(localstatedir)/www/fckeditor fckeditorweb_DATA=invenio-fckeditor-config.js journal-editor-config.js \ journal-editor-styles.xml journal-editor-templates.xml -EXTRA_DIST = $(mathjaxweb_DATA) $(fckeditorweb_DATA) +EXTRA_DIST = $(fckeditorweb_DATA) CLEANFILES = *~ *.tmp diff --git a/modules/webstyle/etc/MathJax.js b/modules/webstyle/etc/MathJax.js deleted file mode 100644 index b7eebb5a0..000000000 --- a/modules/webstyle/etc/MathJax.js +++ /dev/null @@ -1,592 +0,0 @@ -/************************************************************* - * - * MathJax/config/MathJax.js - * - * This configuration file is loaded when there is no explicit - * configuration script in the - // - // would display "[math]" in place of the math until MathJax is able to typeset it. - // - preRemoveClass: "MathJax_Preview", - - // - // This value controls whether the "Processing Math: nn%" message are displayed - // in the lower left-hand corner. Set to "false" to prevent those messages (though - // file loading and other messages will still be shown). - // - showProcessingMessages: false, - - // - // This value controls the verbosity of the messages in the lower left-hand corner. - // Set it to "none" to eliminate all messages, or set it to "simple" to show - // "Loading..." and "Processing..." rather than showing the full file name and the - // percentage of the mathematics processed. - // - messageStyle: "none", - - // - // These two parameters control the alignment and shifting of displayed equations. - // The first can be "left", "center", or "right", and determines the alignment of - // displayed equations. When the alignment is not "center", the second determines - // an indentation from the left or right side for the displayed equations. - // - displayAlign: "center", - displayIndent: "0em", - - // - // Normally MathJax will perform its starup commands (loading of - // configuration, styles, jax, and so on) as soon as it can. If you - // expect to be doing additional configuration on the page, however, you - // may want to have it wait until the page's onload hander is called. If so, - // set this to "onload". - // - delayStartupUntil: "none", - - // - // Normally MathJax will typeset the mathematics on the page as soon as - // the page is loaded. If you want to delay that process, in which case - // you will need to call MathJax.Hub.Typeset() yourself by hand, set - // this value to true. - // - skipStartupTypeset: false, - - //============================================================================ - // - // These parameters control the tex2jax preprocessor (when you have included - // "tex2jax.js" in the extensions list above). - // - tex2jax: { - - // - // The Id of the element to be processed (defaults to full document) - // - element: null, - - // - // The delimiters that surround in-line math expressions. The first in each - // pair is the initial delimiter and the second is the terminal delimiter. - // Comment out any that you don't want, but be sure there is no extra - // comma at the end of the last item in the list -- some browsers won't - // be able to handle that. - // - inlineMath: [ - ['$','$'], // uncomment this for standard TeX math delimiters - ['\\(','\\)'] - ], - - // - // The delimiters that surround displayed math expressions. The first in each - // pair is the initial delimiter and the second is the terminal delimiter. - // Comment out any that you don't want, but be sure there is no extra - // comma at the end of the last item in the list -- some browsers won't - // be able to handle that. - // - displayMath: [ - ['$$','$$'], - ['\\[','\\]'] - ], - - // - // This array lists the names of the tags whose contents should not be - // processed by tex2jax (other than to look for ignore/process classes - // as listed below). You can add to (or remove from) this list to prevent - // MathJax from processing mathematics in specific contexts. - // - skipTags: ["script","noscript","style","textarea","pre","code"], - - // - // This is the class name used to mark elements whose contents should - // not be processed by tex2jax (other than to look for the - // processClass pattern below). Note that this is a regular - // expression, and so you need to be sure to quote any regexp special - // characters. The pattern is automatically preceeded by '(^| )(' and - // followed by ')( |$)', so your pattern will have to match full words - // in the class name. Assigning an element this class name will - // prevent `tex2jax` from processing its contents. - // - ignoreClass: "tex2jax_ignore", - - // - // This is the class name used to mark elements whose contents SHOULD - // be processed by tex2jax. This is used to turn on processing within - // tags that have been marked as ignored or skipped above. Note that - // this is a regular expression, and so you need to be sure to quote - // any regexp special characters. The pattern is automatically - // preceeded by '(^| )(' and followed by ')( |$)', so your pattern - // will have to match full words in the class name. Use this to - // restart processing within an element that has been marked as - // ignored above. - // - processClass: "tex2jax_process", - - // - // Set to "true" to allow \$ to produce a dollar without starting in-line - // math mode. If you uncomment the ['$','$'] line above, you should change - // this to true so that you can insert plain dollar signs into your documents - // - processEscapes: false, - - // - // Controls whether tex2jax processes LaTeX environments outside of math - // mode. Set to "false" to prevent processing of environments except within - // math mode. - // - processEnvironments: true, - - // - // Controls whether tex2jax inserts MathJax_Preview spans to make a - // preview available, and what preview to use, when it locates in-line - // and display mathetics on the page. The default is "TeX", which - // means use the TeX code as the preview (until it is processed by - // MathJax). Set to "none" to prevent the previews from being - // inserted (the math will simply disappear until it is typeset). Set - // to an array containing the description of an HTML snippet in order - // to use the same preview for all equations on the page (e.g., you - // could have it say "[math]" or load an image). - // - // E.g., preview: ["[math]"], - // or preview: [["img",{src: "http://myserver.com/images/mypic.jpg"}]] - // - preview: "TeX" - - }, - - //============================================================================ - // - // These parameters control the mml2jax preprocessor (when you have included - // "mml2jax.js" in the extensions list above). - // - mml2jax: { - - // - // The Id of the element to be processed (defaults to full document) - // - element: null, - - // - // Controls whether mml2jax inserts MathJax_Preview spans to make a - // preview available, and what preview to use, whrn it locates - // mathematics on the page. The default is "alttext", which means use - // the tag's alttext attribute as the preview (until it is - // processed by MathJax), if the tag has one. Set to "none" to - // prevent the previews from being inserted (the math will simply - // disappear until it is typeset). Set to an array containing the - // description of an HTML snippet in order to use the same preview for - // all equations on the page (e.g., you could have it say "[math]" or - // load an image). - // - // E.g., preview: ["[math]"], - // or preview: [["img",{src: "http://myserver.com/images/mypic.jpg"}]] - // - preview: "alttext" - - }, - - //============================================================================ - // - // These parameters control the jsMath2jax preprocessor (when you have included - // "jsMath2jax.js" in the extensions list above). - // - jsMath2jax: { - - // - // The Id of the element to be processed (defaults to full document) - // - element: null, - - // - // Controls whether jsMath2jax inserts MathJax_Preview spans to make a - // preview available, and what preview to use, when it locates - // mathematics on the page. The default is "TeX", which means use the - // TeX code as the preview (until it is processed by MathJax). Set to - // "none" to prevent the previews from being inserted (the math will - // simply disappear until it is typeset). Set to an array containing - // the description of an HTML snippet in order to use the same preview - // for all equations on the page (e.g., you could have it say "[math]" - // or load an image). - // - // E.g., preview: ["[math]"], - // or preview: [["img",{src: "http://myserver.com/images/mypic.jpg"}]] - // - preview: "TeX" - - }, - - //============================================================================ - // - // These parameters control the TeX input jax. - // - TeX: { - - // - // This specifies the side on which \tag{} macros will place the tags. - // Set to "left" to place on the left-hand side. - // - TagSide: "right", - - // - // This is the amound of indentation (from right or left) for the tags. - // - TagIndent: ".8em", - - // - // This is the width to use for the multline environment - // - MultLineWidth: "85%", - - // - // List of macros to define. These are of the form - // name: value - // where 'value' is the replacement text for the macro \name. - // The 'value' can also be [value,n] where 'value' is the replacement - // text and 'n' is the number of parameters for the macro. - // Note that backslashes must be doubled in the replacement string. - // - // E.g., - // - // Macros: { - // RR: '{\\bf R}', - // bold: ['{\\bf #1}', 1] - // } - // - Macros: {} - - }, - - //============================================================================ - // - // These parameters control the MathML inupt jax. - // - MathML: { - // - // This specifies whether to use TeX spacing or MathML spacing when the - // HTML-CSS output jax is used. - // - useMathMLspacing: false - }, - - //============================================================================ - // - // These parameters control the HTML-CSS output jax. - // - "HTML-CSS": { - - // - // This controls the global scaling of mathematics as compared to the - // surrounding text. Values between 100 and 133 are usually good choices. - // - scale: 100, - - // - // This is a list of the fonts to look for on a user's computer in - // preference to using MathJax's web-based fonts. These must - // correspond to directories available in the jax/output/HTML-CSS/fonts - // directory, where MathJax stores data about the characters available - // in the fonts. Set this to ["TeX"], for example, to prevent the - // use of the STIX fonts, or set it to an empty list, [], if - // you want to force MathJax to use web-based or image fonts. - // - availableFonts: ["STIX","TeX"], - - // - // This is the preferred font to use when more than one of those - // listed above is available. - // - preferredFont: "TeX", - - // - // This is the web-based font to use when none of the fonts listed - // above are available on the user's computer. Note that currently - // only the TeX font is available in a web-based form. Set this to - // - // webFont: null, - // - // if you want to prevent the use of web-based fonts. - // - webFont: "TeX", - - // - // This is the font to use for image fallback mode (when none of the - // fonts listed above are available and the browser doesn't support - // web-fonts via the @font-face CSS directive). Note that currently - // only the TeX font is available as an image font. Set this to - // - // imageFont: null, - // - // if you want to prevent the use of image fonts (e.g., you have not - // installed the image fonts on your server). In this case, only - // browsers that support web-based fonts will be able to view your pages - // without having the fonts installed on the client computer. The browsers - // that support web-based fonts include: IE6 and later, Chrome, Safari3.1 - // and above, Firefox3.5 and later, and Opera10 and later. Note that - // Firefox3.0 is NOT on this list, so without image fonts, FF3.0 users - // will be required to to download and install either the STIX fonts or the - // MathJax TeX fonts. - // - imageFont: "TeX", - - // - // This controls whether the MathJax contextual menu will be available - // on the mathematics in the page. If true, then right-clicking (on - // the PC) or control-clicking (on the Mac) will produce a MathJax - // menu that allows you to get the source of the mathematics in - // various formats, change the size of the mathematics relative to the - // surrounding text, and get information about MathJax. - // - // Set this to false to disable the menu. When true, the MathMenu - // items below configure the actions of the menu. - // - showMathMenu: true, - - // - // This allows you to define or modify the styles used to display - // various math elements created by MathJax. - // - // Example: - // styles: { - // ".MathJax_Preview": { - // "font-size": "80%", // preview uses a smaller font - // color: "red" // and is in red - // } - // } - // - styles: {}, - - // - // Configuration for tooltips - // (see also the #MathJax_Tooltip CSS in MathJax/jax/output/HTML-CSS/config.js, - // which can be overriden using the styles values above). - // - tooltip: { - delayPost: 600, // milliseconds delay before tooltip is posted after mouseover - delayClear: 600, // milliseconds delay before tooltip is cleared after mouseout - offsetX: 10, offsetY: 5 // pixels to offset tooltip from mouse position - } - }, - - //============================================================================ - // - // These parameters control the NativeMML output jax. - // - NativeMML: { - - // - // This controls the global scaling of mathematics as compared to the - // surrounding text. Values between 100 and 133 are usually good choices. - // - scale: 100, - - // - // This controls whether the MathJax contextual menu will be available - // on the mathematics in the page. If true, then right-clicking (on - // the PC) or control-clicking (on the Mac) will produce a MathJax - // menu that allows you to get the source of the mathematics in - // various formats, change the size of the mathematics relative to the - // surrounding text, and get information about MathJax. - // - // Set this to false to disable the menu. When true, the MathMenu - // items below configure the actions of the menu. - // - // There is a separate setting for MSIE, since the code to handle that - // is a bit delicate; if it turns out to have unexpected consequences, - // you can turn it off without turing off other browser support. - // - showMathMenu: true, - showMathMenuMSIE: true, - - // - // This allows you to define or modify the styles used to display - // various math elements created by MathJax. - // - // Example: - // styles: { - // ".MathJax_MathML": { - // color: "red" // MathML is in red - // } - // } - // - styles: {} - }, - - //============================================================================ - // - // These parameters control the contextual menus that are available on the - // mathematics within the page (provided the showMathMenu value is true above). - // - MathMenu: { - // - // This is the hover delay for the display of submenus in the - // contextual menu. When the mouse is still over a submenu label for - // this long, the menu will appear. (The menu also will appear if you - // click on the label.) It is in milliseconds. - // - delay: 400, - - // - // This is the URL for the MathJax Help menu item. - // - helpURL: "http://www.mathjax.org/help/user/", - - // - // These control whether the "Math Renderer", "Font Preferences", - // and "Contextual Menu" submenus will be displayed or not. - // - showRenderer: true, - showFontMenu: false, - showContext: false, - - // - // These are the settings for the Show Source window. The initial - // width and height will be reset after the source is shown in an - // attempt to make the window fit the output better. - // - windowSettings: { - status: "no", toolbar: "no", locationbar: "no", menubar: "no", - directories: "no", personalbar: "no", resizable: "yes", scrollbars: "yes", - width: 100, height: 50 - }, - - // - // This allows you to change the CSS that controls the menu - // appearance. See the extensions/MathMenu.js file for details - // of the default settings. - // - styles: {} - - }, - - //============================================================================ - // - // These parameters control the MMLorHTML configuration file. - // NOTE: if you add MMLorHTML.js to the config array above, - // you must REMOVE the output jax from the jax array. - // - MMLorHTML: { - // - // The output jax that is to be preferred when both are possible - // (set to "MML" for native MathML, "HTML" for MathJax's HTML-CSS output jax). - // - prefer: { - MSIE: "MML", - Firefox: "MML", - Opera: "HTML", - other: "HTML" - } - } -}); - -MathJax.Ajax.loadComplete("[MathJax]/config/MathJax.js");