diff --git a/config/invenio.conf b/config/invenio.conf index fe7d12fdf..4cf7a5040 100644 --- a/config/invenio.conf +++ b/config/invenio.conf @@ -1,837 +1,837 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ################################################### ## About 'invenio.conf' and 'invenio-local.conf' ## ################################################### ## The 'invenio.conf' file contains the vanilla default configuration ## parameters of a CDS Invenio installation, as coming from the ## distribution. The file should be self-explanatory. Once installed ## in its usual location (usually /opt/cds-invenio/etc), you could in ## principle go ahead and change the values according to your local ## needs. ## ## However, you can also create a file named 'invenio-local.conf' in ## the same directory where 'invenio.conf' lives and put there only ## the localizations you need to have different from the default ones. ## For example: ## ## $ cat /opt/cds-invenio/etc/invenio-local.conf ## [Invenio] ## CFG_SITE_URL = http://your.site.com ## CFG_SITE_SECURE_URL = https://your.site.com ## CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com ## CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com ## ## The Invenio system will then read both the default invenio.conf ## file and your customized invenio-local.conf file and it will ## override any default options with the ones you have set in your ## local file. This cascading of configuration parameters will ease ## you future upgrades. [Invenio] ################################### ## Part 1: Essential parameters ## ################################### ## This part defines essential CDS Invenio internal parameters that ## everybody should override, like the name of the server or the email ## address of the local CDS Invenio administrator. ## CFG_DATABASE_* - specify which MySQL server to use, the name of the ## database to use, and the database access credentials. CFG_DATABASE_HOST = localhost CFG_DATABASE_NAME = cdsinvenio CFG_DATABASE_USER = cdsinvenio CFG_DATABASE_PASS = my123p$ss ## CFG_SITE_URL - specify URL under which your installation will be ## visible. For example, use "http://your.site.com". Do not leave ## trailing slash. CFG_SITE_URL = http://localhost ## CFG_SITE_SECURE_URL - specify secure URL under which your ## installation secure pages such as login or registration will be ## visible. For example, use "https://your.site.com". Do not leave ## trailing slash. If you don't plan on using HTTPS, then you may ## leave this empty. CFG_SITE_SECURE_URL = https://localhost ## CFG_SITE_NAME -- the visible name of your CDS Invenio installation. CFG_SITE_NAME = Atlantis Institute of Fictive Science ## CFG_SITE_NAME_INTL -- the international versions of CFG_SITE_NAME ## in various languages. (See also CFG_SITE_LANGS below.) CFG_SITE_NAME_INTL_en = Atlantis Institute of Fictive Science CFG_SITE_NAME_INTL_fr = Atlantis Institut des Sciences Fictives CFG_SITE_NAME_INTL_de = Atlantis Institut der fiktiven Wissenschaft CFG_SITE_NAME_INTL_es = Atlantis Instituto de la Ciencia Fictive CFG_SITE_NAME_INTL_ca = Institut Atlantis de Ciència Fictícia CFG_SITE_NAME_INTL_pt = Instituto Atlantis de Ciência Fictícia CFG_SITE_NAME_INTL_it = Atlantis Istituto di Scienza Fittizia CFG_SITE_NAME_INTL_ru = Атлантис Институт фиктивных Наук CFG_SITE_NAME_INTL_sk = Atlantis Inštitút Fiktívnych Vied CFG_SITE_NAME_INTL_cs = Atlantis Institut Fiktivních Věd CFG_SITE_NAME_INTL_no = Atlantis Institutt for Fiktiv Vitenskap CFG_SITE_NAME_INTL_sv = Atlantis Institut för Fiktiv Vetenskap CFG_SITE_NAME_INTL_el = Ινστιτούτο Φανταστικών Επιστημών Ατλαντίδος CFG_SITE_NAME_INTL_uk = Інститут вигаданих наук в Атлантісі CFG_SITE_NAME_INTL_ja = Fictive 科学のAtlantis の協会 CFG_SITE_NAME_INTL_pl = Instytut Fikcyjnej Nauki Atlantis CFG_SITE_NAME_INTL_bg = Институт за фиктивни науки Атлантис CFG_SITE_NAME_INTL_hr = Institut Fiktivnih Znanosti Atlantis CFG_SITE_NAME_INTL_zh_CN = 阿特兰提斯虚拟科学学院 CFG_SITE_NAME_INTL_zh_TW = 阿特蘭提斯虛擬科學學院 CFG_SITE_NAME_INTL_hu = Atlantis Institute of Fictive Science ## CFG_SITE_LANG -- the default language of the interface: CFG_SITE_LANG = en ## CFG_SITE_LANGS -- list of all languages the user interface should ## be available in, separated by commas. The order specified below ## will be respected on the interface pages. A good default would be ## to use the alphabetical order. Currently supported languages ## include Bulgarian, Catalan, Czech, German, Greek, English, Spanish, ## French, Croatian, Hungarian, Italian, Japanese, Norwegian, Polish, ## Portuguese, Russian, Slovak, Swedish, Ukrainian, Chinese (China), ## Chinese (Taiwan), so that the current eventual maximum you can ## currently select is ## "bg,ca,cs,de,el,en,es,fr,hr,it,hu,ja,no,pl,pt,ru,sk,sv,uk,zh_CN,zh_TW". CFG_SITE_LANGS = bg,ca,cs,de,el,en,es,fr,hr,it,hu,ja,no,pl,pt,ru,sk,sv,uk,zh_CN,zh_TW ## CFG_SITE_SUPPORT_EMAIL -- the email address of the support team for ## this installation: CFG_SITE_SUPPORT_EMAIL = cds.support@cern.ch ## CFG_SITE_ADMIN_EMAIL -- the email address of the 'superuser' for ## this installation. Enter your email address below and login with ## this address when using CDS Invenio administration modules. You ## will then be automatically recognized as superuser of the system. CFG_SITE_ADMIN_EMAIL = cds.support@cern.ch ## CFG_CERN_SITE -- do we want to enable CERN-specific code? ## Put "1" for "yes" and "0" for "no". CFG_CERN_SITE = 0 ## CFG_INSPIRE_SITE -- do we want to enable INSPIRE-specific code? ## Put "1" for "yes" and "0" for "no". CFG_INSPIRE_SITE = 0 ## CFG_DEVEL_SITE -- is this a development site? If it is, you might ## prefer that it doesn't do certain things. For example, you might ## not want WebSubmit to send certain emails or trigger certain ## processes on a development site. ## Put "1" for "yes" (this is a development site) or "0" for "no" ## (this isn't a development site.) CFG_DEVEL_SITE = 0 ################################ ## Part 2: Web page style ## ################################ ## The variables affecting the page style. The most important one is ## the 'template skin' you would like to use. Please refer to the ## WebStyle Admin Guide for more explanation. The other variables are ## listed here mostly for backwards compatibility purposes only. ## CFG_WEBSTYLE_TEMPLATE_SKIN -- what template skin do you want to ## use? CFG_WEBSTYLE_TEMPLATE_SKIN = default ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP -- eventual global HTML ## left top box: CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM -- eventual global ## HTML left bottom box: CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP -- eventual global ## HTML right top box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM -- eventual global ## HTML right bottom box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM = ################################## ## Part 3: WebSearch parameters ## ################################## ## This section contains some configuration parameters for WebSearch ## module. Please note that WebSearch is mostly configured on ## run-time via its WebSearch Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (Note that you may modify them ## afterwards too, though.) ## CFG_WEBSEARCH_SEARCH_CACHE_SIZE -- how many queries we want to ## cache in memory per one Apache httpd process? This cache is used ## mainly for "next/previous page" functionality, but it caches also ## "popular" user queries if more than one user happen to search for ## the same thing. Note that large numbers may lead to great memory ## consumption. We recommend a value not greater than 100. CFG_WEBSEARCH_SEARCH_CACHE_SIZE = 100 ## CFG_WEBSEARCH_FIELDS_CONVERT -- if you migrate from an older ## system, you may want to map field codes of your old system (such as ## 'ti') to CDS Invenio/MySQL ("title"). Use Python dictionary syntax ## for the translation table, e.g. {'wau':'author', 'wti':'title'}. ## Usually you don't want to do that, and you would use empty dict {}. CFG_WEBSEARCH_FIELDS_CONVERT = {} ## CFG_WEBSEARCH_SUPERSIMPLESEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the super-simple search interface, in ## characters. CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 60 CFG_WEBSEARCH_SUPERSIMPLESEARCH_PATTERN_BOX_WIDTH = 60 ## CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH -- width of the search ## pattern window in the simple search interface, in characters. CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 40 ## CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the advanced search interface, in ## characters. CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH = 30 ## CFG_WEBSEARCH_NB_RECORDS_TO_SORT -- how many records do we still ## want to sort? For higher numbers we print only a warning and won't ## perform any sorting other than default 'latest records first', as ## sorting would be very time consuming then. We recommend a value of ## not more than a couple of thousands. CFG_WEBSEARCH_NB_RECORDS_TO_SORT = 1000 ## CFG_WEBSEARCH_CALL_BIBFORMAT -- if a record is being displayed but ## it was not preformatted in the "HTML brief" format, do we want to ## call BibFormatting on the fly? Put "1" for "yes" and "0" for "no". ## Note that "1" will display the record exactly as if it were fully ## preformatted, but it may be slow due to on-the-fly processing; "0" ## will display a default format very fast, but it may not have all ## the fields as in the fully preformatted HTML brief format. Note ## also that this option is active only for old (PHP) formats; the new ## (Python) formats are called on the fly by default anyway, since ## they are much faster. When usure, please set "0" here. CFG_WEBSEARCH_CALL_BIBFORMAT = 0 ## CFG_WEBSEARCH_USE_ALEPH_SYSNOS -- do we want to make old SYSNOs ## visible rather than MySQL's record IDs? You may use this if you ## migrate from a different e-doc system, and you store your old ## system numbers into 970__a. Put "1" for "yes" and "0" for ## "no". Usually you don't want to do that, though. CFG_WEBSEARCH_USE_ALEPH_SYSNOS = 0 ## CFG_WEBSEARCH_I18N_LATEST_ADDITIONS -- Put "1" if you want the ## "Latest Additions" in the web collection pages to show ## internationalized records. Useful only if your brief BibFormat ## templates contains internationalized strings. Otherwise put "0" in ## order not to slow down the creation of latest additions by WebColl. CFG_WEBSEARCH_I18N_LATEST_ADDITIONS = 0 ## CFG_WEBSEARCH_INSTANT_BROWSE -- the number of records to display ## under 'Latest Additions' in the web collection pages. CFG_WEBSEARCH_INSTANT_BROWSE = 10 ## CFG_WEBSEARCH_INSTANT_BROWSE_RSS -- the number of records to ## display in the RSS feed. CFG_WEBSEARCH_INSTANT_BROWSE_RSS = 25 ## CFG_WEBSEARCH_RSS_TTL -- number of minutes that indicates how long ## a feed cache is valid. CFG_WEBSEARCH_RSS_TTL = 360 ## CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS -- maximum number of request kept ## in cache. If the cache is filled, following request are not cached. CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS = 1000 ## CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD -- up to how many author names ## to print explicitely; for more print "et al". Note that this is ## used in default formatting that is seldomly used, as usually ## BibFormat defines all the format. The value below is only used ## when BibFormat fails, for example. CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD = 3 ## CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS -- whether to show or ## not collection grandsons in Narrow Search boxes (sons are shown by ## default, grandsons are configurable here). Use 0 for no and 1 for ## yes. CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS = 1 ## CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX -- shall we ## create help links for Ellis, Nick or Ellis, Nicholas and friends ## when Ellis, N was searched for? Useful if you have one author ## stored in the database under several name formats, namely surname ## comma firstname and surname comma initial cataloging policy. Use 0 ## for no and 1 for yes. CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX = 1 ## CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS -- jsMath is a JavaScript ## library that renders (La)TeX mathematical formulas in the client ## browser. This parameter must contain a comma-separated list of ## output formats for which to apply the jsMath rendering, for example ## "hb,hd". If the list is empty, jsMath is disabled. CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS = ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT -- when searching ## external collections (e.g. SPIRES, CiteSeer, etc), how many seconds ## do we wait for reply before abandonning? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT = 5 ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS -- how many ## results do we fetch? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS = 10 ## CFG_WEBSEARCH_SPLIT_BY_COLLECTION -- do we want to split the search ## results by collection or not? Use 0 for not, 1 for yes. CFG_WEBSEARCH_SPLIT_BY_COLLECTION = 1 -## CFG_ENABLED_SEARCH_INTERFACES -- which search modes are enabled? +## CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES -- which search modes are enabled? ## Choose among: ## -1 : super-simple search mode (search field + search button) !EXPERIMENTAL! ## 0 : simple search mode (same as above + browse button + fields list ## + more options) ## 1 : advanced search mode (same as above + 3 search fields + choice of ## matching types + combining operators) ## Note that by default the search interface will allow to switch ## between 2 modes only. To switch between more modes would require ## you to modify the WebSearch templates. Typically you would ## therefore choose modes -1,1 or 0,1 or -1,0 as combinations. ## If more than 2 modes are specified, switching between "basic" and ## "advanced" modes in the search interface will switch between ## minimum and maximum values. Comma-separated list of modes -CFG_ENABLED_SEARCH_INTERFACES = -1,1 +CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES = 0,1 -## CFG_DEFAULT_SEARCH_INTERFACE -- which search mode is enabled by +## CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE -- which search mode is enabled by ## default? One of the values specified for -## CFG_ENABLED_SEARCH_INTERFACES. -CFG_DEFAULT_SEARCH_INTERFACE = -1 +## CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES. +CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE = 0 ####################################### ## Part 4: BibHarvest OAI parameters ## ####################################### ## This part defines parameters for the CDS Invenio OAI gateway. ## Useful if you are running CDS Invenio as OAI data provider. ## CFG_OAI_ID_FIELD -- OAI identifier MARC field: CFG_OAI_ID_FIELD = 909COo ## CFG_OAI_SET_FIELD -- OAI set MARC field: CFG_OAI_SET_FIELD = 909COp ## CFG_OAI_DELETED_POLICY -- OAI deletedrecordspolicy ## (no/transient/persistent). CFG_OAI_DELETED_POLICY = no ## CFG_OAI_ID_PREFIX -- OAI identifier prefix: CFG_OAI_ID_PREFIX = atlantis.cern.ch ## CFG_OAI_SAMPLE_IDENTIFIER -- OAI sample identifier: CFG_OAI_SAMPLE_IDENTIFIER = oai:atlantis.cern.ch:CERN-TH-4036 ## CFG_OAI_IDENTIFY_DESCRIPTION -- description for the OAI Identify verb: CFG_OAI_IDENTIFY_DESCRIPTION = oai atlantis.cern.ch : oai:atlantis.cern.ch:CERN-TH-4036 http://atlantis.cern.ch/ Free and unlimited use by anybody with obligation to refer to original record Full content, i.e. preprints may not be harvested by robots Submission restricted. Submitted documents are subject of approval by OAI repository admins. ## CFG_OAI_LOAD -- OAI number of records in a response: CFG_OAI_LOAD = 1000 ## CFG_OAI_EXPIRE -- OAI resumptionToken expiration time: CFG_OAI_EXPIRE = 90000 ## CFG_OAI_SLEEP -- service unavailable between two consecutive ## requests for CFG_OAI_SLEEP seconds: CFG_OAI_SLEEP = 10 ################################## ## Part 5: WebSubmit parameters ## ################################## ## This section contains some configuration parameters for WebSubmit ## module. Please note that WebSubmit is mostly configured on ## run-time via its WebSubmit Admin web interface. The parameters ## below are the ones that you do not probably want to modify during ## the runtime. ## CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT -- the fulltext ## documents are stored under "/opt/cds-invenio/var/data/files/gX/Y" ## directories where X is 0,1,... and Y stands for bibdoc ID. Thusly ## documents Y are grouped into directories X and this variable ## indicates the maximum number of documents Y stored in each ## directory X. This limit is imposed solely for filesystem ## performance reasons in order not to have too many subdirectories in ## a given directory. CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000 ## CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS -- a comma-separated ## list of document extensions not listed in Python standard mimetype ## library that should be recognized by Invenio. CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS = hpg,link,lis,llb,mat,mpp,msg ################################# ## Part 6: BibIndex parameters ## ################################# ## This section contains some configuration parameters for BibIndex ## module. Please note that BibIndex is mostly configured on run-time ## via its BibIndex Admin web interface. The parameters below are the ## ones that you do not probably want to modify very often during the ## runtime. ## CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY -- when fulltext indexing, do ## you want to index locally stored files only, or also external URLs? ## Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY = 0 ## CFG_BIBINDEX_REMOVE_STOPWORDS -- when indexing, do we want to remove ## stopwords? Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_REMOVE_STOPWORDS = 0 ## CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS -- characters considered as ## alphanumeric separators of word-blocks inside words. You probably ## don't want to change this. CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS = \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ ## CFG_BIBINDEX_CHARS_PUNCTUATION -- characters considered as punctuation ## between word-blocks inside words. You probably don't want to ## change this. CFG_BIBINDEX_CHARS_PUNCTUATION = \.\,\:\;\?\!\" ## CFG_BIBINDEX_REMOVE_HTML_MARKUP -- should we attempt to remove HTML markup ## before indexing? Use 1 if you have HTML markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_HTML_MARKUP = 0 ## CFG_BIBINDEX_REMOVE_LATEX_MARKUP -- should we attempt to remove LATEX markup ## before indexing? Use 1 if you have LATEX markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_LATEX_MARKUP = 0 ## CFG_BIBINDEX_MIN_WORD_LENGTH -- minimum word length allowed to be added to ## index. The terms smaller then this amount will be discarded. ## Useful to keep the database clean, however you can safely leave ## this value on 0 for up to 1,000,000 documents. CFG_BIBINDEX_MIN_WORD_LENGTH = 0 ## CFG_BIBINDEX_URLOPENER_USERNAME and CFG_BIBINDEX_URLOPENER_PASSWORD -- ## access credentials to access restricted URLs, interesting only if ## you are fulltext-indexing files located on a remote server that is ## only available via username/password. But it's probably better to ## handle this case via IP or some convention; the current scheme is ## mostly there for demo only. CFG_BIBINDEX_URLOPENER_USERNAME = mysuperuser CFG_BIBINDEX_URLOPENER_PASSWORD = mysuperpass ## CFG_INTBITSET_ENABLE_SANITY_CHECKS -- ## Enable sanity checks for integers passed to the intbitset data ## structures. It is good to enable this during debugging ## and to disable this value for speed improvements. CFG_INTBITSET_ENABLE_SANITY_CHECKS = False ####################################### ## Part 7: Access control parameters ## ####################################### ## This section contains some configuration parameters for the access ## control system. Please note that WebAccess is mostly configured on ## run-time via its WebAccess Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (If you do want to modify them during ## runtime, for example te deny access temporarily because of backups, ## you can edit access_control_config.py directly, no need to get back ## here and no need to redo the make process.) ## CFG_ACCESS_CONTROL_LEVEL_SITE -- defines how open this site is. ## Use 0 for normal operation of the site, 1 for read-only site (all ## write operations temporarily closed), 2 for site fully closed. ## Useful for site maintenance. CFG_ACCESS_CONTROL_LEVEL_SITE = 0 ## CFG_ACCESS_CONTROL_LEVEL_GUESTS -- guest users access policy. Use ## 0 to allow guest users, 1 not to allow them (all users must login). CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0 ## CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS -- account registration and ## activation policy. When 0, users can register and accounts are ## automatically activated. When 1, users can register but admin must ## activate the accounts. When 2, users cannot register nor update ## their email address, only admin can register accounts. When 3, ## users cannot register nor update email address nor password, only ## admin can register accounts. When 4, the same as 3 applies, nor ## user cannot change his login method. CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN -- limit account ## registration to certain email addresses? If wanted, give domain ## name below, e.g. "cern.ch". If not wanted, leave it empty. CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN = ## CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS -- send a ## notification email to the administrator when a new account is ## created? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT -- send a ## notification email to the user when a new account is created in order to ## to verify the validity of the provided email address? Use ## 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 1 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION -- send a ## notification email to the user when a new account is activated? ## Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION -- send a ## notification email to the user when a new account is deleted or ## account demand rejected? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0 ## CFG_APACHE_PASSWORD_FILE -- the file where Apache user credentials ## are stored. Must be an absolute pathname. If the value does not ## start by a slash, it is considered to be the filename of a file ## located under prefix/var/tmp directory. This is useful for the ## demo site testing purposes. For the production site, if you plan ## to restrict access to some collections based on the Apache user ## authentication mechanism, you should put here an absolute path to ## your Apache password file. CFG_APACHE_PASSWORD_FILE = demo-site-apache-user-passwords ## CFG_APACHE_GROUP_FILE -- the file where Apache user groups are ## defined. See the documentation of the preceding config variable. CFG_APACHE_GROUP_FILE = demo-site-apache-user-groups ################################### ## Part 8: WebSession parameters ## ################################### ## This section contains some configuration parameters for tweaking ## session handling. ## CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT -- number of days after which a session ## and the corresponding cookie is considered expired. CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT = 2 ## CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER -- number of days after which a session ## and the corresponding cookie is considered expired, when the user has ## requested to permanently stay logged in. CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER = 365 ## CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS -- when user requested ## a password reset, for how many days is the URL valid? CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS -- when an account ## activation email was sent, for how many days is the URL valid? CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS -- when ## user won't confirm his email address and not complete ## registeration, after how many days will it expire? CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS = 10 ## CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS -- when set to 1, the session ## system allocates the same uid=0 to all guests users regardless of where they ## come from. 0 allocate a unique uid to each guest. CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS = 0 ################################ ## Part 9: BibRank parameters ## ################################ ## This section contains some configuration parameters for the ranking ## system. ## CFG_BIBRANK_SHOW_READING_STATS -- do we want to show reading ## similarity stats? ('People who viewed this page also viewed') CFG_BIBRANK_SHOW_READING_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_STATS -- do we want to show the download ## similarity stats? ('People who downloaded this document also ## downloaded') CFG_BIBRANK_SHOW_DOWNLOAD_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS -- do we want to show download ## history graph? CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION -- do we ## want to show a graph representing the distribution of client IPs ## downloading given document? CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION = 0 ## CFG_BIBRANK_SHOW_CITATION_LINKS -- do we want to show the 'Cited ## by' links? (useful only when you have citations in the metadata) CFG_BIBRANK_SHOW_CITATION_LINKS = 1 ## CFG_BIBRANK_SHOW_CITATION_STATS -- de we want to show citation ## stats? ('Cited by M recors', 'Co-cited with N records') CFG_BIBRANK_SHOW_CITATION_STATS = 1 ## CFG_BIBRANK_SHOW_CITATION_GRAPHS -- do we want to show citation ## history graph? CFG_BIBRANK_SHOW_CITATION_GRAPHS = 1 #################################### ## Part 10: WebComment parameters ## #################################### ## This section contains some configuration parameters for the ## commenting and reviewing facilities. ## CFG_WEBCOMMENT_ALLOW_COMMENTS -- do we want to allow users write ## public comments on records? CFG_WEBCOMMENT_ALLOW_COMMENTS = 1 ## CFG_WEBCOMMENT_ALLOW_REVIEWS -- do we want to allow users write ## public reviews of records? CFG_WEBCOMMENT_ALLOW_REVIEWS = 1 ## CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS -- do we want to allow short ## reviews, that is just the attribution of stars without submitting ## detailed review text? CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS = 0 ## CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN -- if users ## report a comment to be abusive, how many they have to be before the ## site admin is alerted? CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN = 5 ## CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW -- how many comments do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW -- how many reviews do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL -- do we notify the site ## admin after every comment? CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL = 1 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple comment submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS = 20 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple review submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS = 20 ################################## ## Part 11: BibSched parameters ## ################################## ## This section contains some configuration parameters for the ## bibliographic task scheduler. ## CFG_BIBSCHED_REFRESHTIME -- how often do we want to refresh ## bibsched monitor? (in seconds) CFG_BIBSCHED_REFRESHTIME = 5 ## CFG_BIBSCHED_LOG_PAGER -- what pager to use to view bibsched task ## logs? CFG_BIBSCHED_LOG_PAGER = /bin/more ## CFG_BIBSCHED_GC_TASKS_OLDER_THAN -- after how many days to perform the ## gargbage collector of BibSched queue (i.e. removing/moving task to archive). CFG_BIBSCHED_GC_TASKS_OLDER_THAN = 30 ## CFG_BIBSCHED_GC_TASKS_TO_REMOVE -- list of BibTask that can be safely ## removed from the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_REMOVE = bibindex,bibreformat,webcoll,bibrank,inveniogc ## CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE -- list of BibTasks that should be safely ## archived out of the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE = bibupload,oaiarchive ## CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS -- maximum number of BibTasks ## that can run concurrently. CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS = 1 ################################### ## Part 12: WebBasket parameters ## ################################### ## CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS -- a safety limit for ## a maximum number of displayed baskets CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS = 20 ################################## ## Part 13: WebAlert parameters ## ################################## ## This section contains some configuration parameters for the ## automatic email notification alert system. ## CFG_WEBALERT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be sent: CFG_WEBALERT_ALERT_ENGINE_EMAIL = cds.alert@cdsdev.cern.ch ## CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL -- how many records ## at most do we send in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL = 20 ## CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL -- number of ## chars per line in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL = 72 ## CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES -- when sending alert ## emails fails, how many times we retry? CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES = 3 ## CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES -- when sending ## alert emails fails, what is the sleeptime between tries? (in ## seconds) CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES = 300 #################################### ## Part 14: WebMessage parameters ## #################################### ## CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE -- how large web messages do we ## allow? CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE = 20000 ## CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES -- how many messages for a ## regular user do we allow in its inbox? CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES = 30 ## CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS -- how many days before ## we delete orphaned messages? CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS = 60 ################################## ## Part 15: MiscUtil parameters ## ################################## ## CFG_MISCUTIL_SQL_MAX_CACHED_QUERIES -- maximum number of cached SQL ## queries possible. After reaching this number the cache is pruned ## by deleting half of the older queries. CFG_MISCUTIL_SQL_MAX_CACHED_QUERIES = 10000 ## CFG_MISCUTIL_SQL_USE_SQLALCHEMY -- whether to use SQLAlchemy.pool ## in the DB engine of CDS Invenio. It is okay to enable this flag ## even if you have not installed SQLAlchemy. Note that Invenio will ## loose some perfomance if this option is enabled. CFG_MISCUTIL_SQL_USE_SQLALCHEMY = False ## CFG_MISCUTIL_SMTP_HOST -- which server to use as outgoing mail server to ## send outgoing emails generated by the system, for example concerning ## submissions or email notification alerts. CFG_MISCUTIL_SMTP_HOST = localhost ## CFG_MISCUTIL_SMTP_PORT -- which port to use on the outgoing mail server ## defined in the previous step. CFG_MISCUTIL_SMTP_PORT = 25 ################################# ## Part 16: BibEdit parameters ## ################################# ## CFG_BIBEDIT_TIMEOUT -- when a user edits a record, this record is ## locked to prevent other users to edit it at the same time. After ## how many seconds the locked record will be again free for other ## people to edit? CFG_BIBEDIT_TIMEOUT = 3600 ## CFG_BIBEDIT_LOCKLEVEL -- when a user tries to edit a record being edited by ## another user, the lock level determines when it is permitted to do so. ## Level 0 - permits editing if there are no recent edit sessions in tmp directory ## (unsafe, use only if you know what you are doing) ## Level 1 - permits editing if there are no queued bibedit tasks for this record ## (safe with respect to bibedit, but not for other bibupload maintenance jobs) ## Level 2 - permits editing if there are no queued bibupload tasks of any sort ## (safe, but may lock more than necessary if many cataloguers around) ## Level 3 - permits editing if no queued bibupload task concerns given record ## (safe, most precise locking, but slow, checks for 001/035/970) ## The recommended level is 3 (default) or 2 (if you use maintenance jobs often). CFG_BIBEDIT_LOCKLEVEL = 3 ################################### ## Part 17: BibUpload parameters ## ################################### ## CFG_BIBUPLOAD_REFERENCE_TAG -- where do we store references? CFG_BIBUPLOAD_REFERENCE_TAG = 999 ## CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG -- where do we store external ## system numbers? Useful for matching when our records come from an ## external digital library system. CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG = 970__a ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG -- where do we store OAI ID tags ## of harvested records? Useful for matching when we harvest stuff ## via OAI that we do not want to reexport via Invenio OAI; so records ## may have only the source OAI ID stored in this tag (kind of like ## external system number too). CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG = 035__a ## CFG_BIBUPLOAD_STRONG_TAGS -- a comma-separated list of tags that ## are strong enough to resist the replace mode. Useful for tags that ## might be created from an external non-metadata-like source, ## e.g. the information about the number of copies left. CFG_BIBUPLOAD_STRONG_TAGS = 964 ## CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS -- a comma-separated list of system ## paths from which it is allowed to take fulltextes that will be uploaded via ## FFT (CFG_TMPDIR is included by default). CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS = /tmp,/home ########################## ## THAT's ALL, FOLKS! ## ########################## diff --git a/modules/miscutil/lib/inveniocfg.py b/modules/miscutil/lib/inveniocfg.py index 8801daaa5..bda019fcd 100644 --- a/modules/miscutil/lib/inveniocfg.py +++ b/modules/miscutil/lib/inveniocfg.py @@ -1,1006 +1,1006 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Invenio configuration and administration CLI tool. Usage: inveniocfg [options] General options: -h, --help print this help -V, --version print version number Options to finish your installation: --create-apache-conf create Apache configuration files --create-tables create DB tables for Invenio --drop-tables drop DB tables of Invenio Options to set up and test a demo site: --create-demo-site create demo site --load-demo-records load demo records --remove-demo-records remove demo records, keeping demo site --drop-demo-site drop demo site configurations too --run-unit-tests run unit test suite (needs deme site) --run-regression-tests run regression test suite (needs demo site) --run-web-tests run web tests in a browser (needs demo site, Firefox, Selenium IDE) Options to update config files in situ: --update-all perform all the update options --update-config-py update config.py file from invenio.conf file --update-dbquery-py update dbquery.py with DB credentials from invenio.conf --update-dbexec update dbexec with DB credentials from invenio.conf --update-bibconvert-tpl update bibconvert templates with CFG_SITE_URL from invenio.conf --update-web-tests update web test cases with CFG_SITE_URL from invenio.conf Options to update DB tables: --reset-all perform all the reset options --reset-sitename reset tables to take account of new CFG_SITE_NAME* --reset-siteadminemail reset tables to take account of new CFG_SITE_ADMIN_EMAIL --reset-fieldnames reset tables to take account of new I18N names from PO files Options to help the work: --list print names and values of all options from conf files --get get value of a given option from conf files --conf-dir path to directory where invenio*.conf files are [optional] --detect-system-details print system details such as Apache/Python/MySQL versions """ __revision__ = "$Id$" from ConfigParser import ConfigParser import os import re import shutil import socket import sys def print_usage(): """Print help.""" print __doc__ def print_version(): """Print version information.""" print __revision__ def convert_conf_option(option_name, option_value): """ Convert conf option into Python config.py line, converting values to ints or strings as appropriate. """ ## 1) convert option name to uppercase: option_name = option_name.upper() ## 2) convert option value to int or string: if option_name in ['CFG_BIBUPLOAD_REFERENCE_TAG', 'CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG', 'CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG', 'CFG_BIBUPLOAD_STRONG_TAGS']: # some options are supposed be string even when they look like # numeric option_value = '"' + option_value + '"' else: try: option_value = int(option_value) except ValueError: option_value = '"' + option_value + '"' ## 3a) special cases: regexps if option_name in ['CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS', 'CFG_BIBINDEX_CHARS_PUNCTUATION']: option_value = 'r"[' + option_value[1:-1] + ']"' ## 3b) special cases: True, False, None if option_value in ['"True"', '"False"', '"None"']: option_value = option_value[1:-1] ## 3c) special cases: dicts if option_name in ['CFG_WEBSEARCH_FIELDS_CONVERT', ]: option_value = option_value[1:-1] ## 3d) special cases: comma-separated lists if option_name in ['CFG_SITE_LANGS', 'CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS', 'CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS', 'CFG_BIBUPLOAD_STRONG_TAGS', 'CFG_BIBSCHED_GC_TASKS_TO_REMOVE', 'CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE', 'CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS', - 'CFG_ENABLED_SEARCH_INTERFACES']: + 'CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES']: out = "[" for elem in option_value[1:-1].split(","): if elem: - if option_name in ['CFG_ENABLED_SEARCH_INTERFACES']: + if option_name in ['CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES']: # 3d1) integer values out += "%i, " % int(elem) else: # 3d2) string values out += "'%s', " % elem out += "]" option_value = out ## 3e) special cases: multiline if option_name == 'CFG_OAI_IDENTIFY_DESCRIPTION': # make triple quotes option_value = '""' + option_value + '""' ## 3f) ignore some options: if option_name == 'CFG_SITE_NAME_INTL': # treated elsewhere return ## 4) finally, return output line: return '%s = %s' % (option_name, option_value) def cli_cmd_update_config_py(conf): """ Update new config.py from conf options, keeping previous config.py in a backup copy. """ print ">>> Going to update config.py..." ## location where config.py is: configpyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'config.py' ## backup current config.py file: if os.path.exists(configpyfile): shutil.copy(configpyfile, configpyfile + '.OLD') ## here we go: fdesc = open(configpyfile, 'w') ## generate preamble: fdesc.write("# -*- coding: utf-8 -*-\n") fdesc.write("# DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED\n") fdesc.write("# FROM INVENIO.CONF BY EXECUTING:\n") fdesc.write("# " + " ".join(sys.argv) + "\n") ## special treatment for CFG_SITE_NAME_INTL options: fdesc.write("CFG_SITE_NAME_INTL = {}\n") for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): fdesc.write("CFG_SITE_NAME_INTL['%s'] = \"%s\"\n" % (lang, conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang))) ## special treatment for CFG_SITE_SECURE_URL that may be empty, in ## which case it should be put equal to CFG_SITE_URL: if not conf.get("Invenio", "CFG_SITE_SECURE_URL"): conf.set("Invenio", "CFG_SITE_SECURE_URL", conf.get("Invenio", "CFG_SITE_URL")) ## process all the options normally: for section in conf.sections(): for option in conf.options(section): if not option.startswith('CFG_DATABASE_'): # put all options except for db credentials into config.py line_out = convert_conf_option(option, conf.get(section, option)) if line_out: fdesc.write(line_out + "\n") ## generate postamble: fdesc.write("") fdesc.write("# END OF GENERATED FILE") ## we are done: fdesc.close() print "You may want to restart Apache now." print ">>> config.py updated successfully." def cli_cmd_update_dbquery_py(conf): """ Update lib/dbquery.py file with DB parameters read from conf file. Note: this edits dbquery.py in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbquery.py..." ## location where dbquery.py is: dbquerypyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'dbquery.py' ## backup current dbquery.py file: if os.path.exists(dbquerypyfile): shutil.copy(dbquerypyfile, dbquerypyfile + '.OLD') ## replace db parameters: out = '' for line in open(dbquerypyfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|NAME|USER|PASS)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get('Invenio', dbparam)) else: out += line fdesc = open(dbquerypyfile, 'w') fdesc.write(out) fdesc.close() print "You may want to restart Apache now." print ">>> dbquery.py updated successfully." def cli_cmd_update_dbexec(conf): """ Update bin/dbexec file with DB parameters read from conf file. Note: this edits dbexec in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbexec..." ## location where dbexec is: dbexecfile = conf.get("Invenio", "CFG_BINDIR") + \ os.sep + 'dbexec' ## backup current dbexec file: if os.path.exists(dbexecfile): shutil.copy(dbexecfile, dbexecfile + '.OLD') ## replace db parameters via sed: out = '' for line in open(dbexecfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|NAME|USER|PASS)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get("Invenio", dbparam)) else: out += line fdesc = open(dbexecfile, 'w') fdesc.write(out) fdesc.close() print ">>> dbexec updated successfully." def cli_cmd_update_bibconvert_tpl(conf): """ Update bibconvert/config/*.tpl files looking for 856 http://.../record/ lines, replacing URL with CFG_SITE_URL taken from conf file. Note: this edits tpl files in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update bibconvert templates..." ## location where bibconvert/config/*.tpl are: tpldir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'bibconvert' + os.sep + 'config' ## find all *.tpl files: for tplfilename in os.listdir(tpldir): if tplfilename.endswith(".tpl"): ## change tpl file: tplfile = tpldir + os.sep + tplfilename shutil.copy(tplfile, tplfile + '.OLD') out = '' for line in open(tplfile, 'r').readlines(): match = re.search(r'^(.*)http://.*?/record/(.*)$', line) if match: out += "%s%s/record/%s\n" % (match.group(1), conf.get("Invenio", 'CFG_SITE_URL'), match.group(2)) else: out += line fdesc = open(tplfile, 'w') fdesc.write(out) fdesc.close() print ">>> bibconvert templates updated successfully." def cli_cmd_update_web_tests(conf): """ Update web test cases lib/webtest/test_*.html looking for http://.+?[>> Going to update web tests..." ## location where test_*.html files are: testdir = conf.get("Invenio", 'CFG_PREFIX') + os.sep + \ 'lib' + os.sep + 'webtest' + os.sep + 'invenio' ## find all test_*.html files: for testfilename in os.listdir(testdir): if testfilename.startswith("test_") and \ testfilename.endswith(".html"): ## change test file: testfile = testdir + os.sep + testfilename shutil.copy(testfile, testfile + '.OLD') out = '' for line in open(testfile, 'r').readlines(): match = re.search(r'^(.*)http://.+?([>> web tests updated successfully." def cli_cmd_reset_sitename(conf): """ Reset collection-related tables with new CFG_SITE_NAME and CFG_SITE_NAME_INTL* read from conf files. """ print ">>> Going to reset CFG_SITE_NAME and CFG_SITE_NAME_INTL..." from invenio.dbquery import run_sql, IntegrityError # reset CFG_SITE_NAME: sitename = conf.get("Invenio", "CFG_SITE_NAME") try: run_sql("""INSERT INTO collection (id, name, dbquery, reclist, restricted) VALUES (1,%s,NULL,NULL,NULL)""", (sitename,)) except IntegrityError: run_sql("""UPDATE collection SET name=%s WHERE id=1""", (sitename,)) # reset CFG_SITE_NAME_INTL: for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): sitename_lang = conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang) try: run_sql("""INSERT INTO collectionname (id_collection, ln, type, value) VALUES (%s,%s,%s,%s)""", (1, lang, 'ln', sitename_lang)) except IntegrityError: run_sql("""UPDATE collectionname SET value=%s WHERE ln=%s AND id_collection=1 AND type='ln'""", (sitename_lang, lang)) print "You may want to restart Apache now." print ">>> CFG_SITE_NAME and CFG_SITE_NAME_INTL* reset successfully." def cli_cmd_reset_siteadminemail(conf): """ Reset user-related tables with new CFG_SITE_ADMIN_EMAIL read from conf files. """ print ">>> Going to reset CFG_SITE_ADMIN_EMAIL..." from invenio.dbquery import run_sql siteadminemail = conf.get("Invenio", "CFG_SITE_ADMIN_EMAIL") run_sql("DELETE FROM user WHERE id=1") run_sql("""INSERT INTO user (id, email, password, note, nickname) VALUES (1, %s, AES_ENCRYPT(email, ''), 1, 'admin')""", (siteadminemail,)) print "You may want to restart Apache now." print ">>> CFG_SITE_ADMIN_EMAIL reset successfully." def cli_cmd_reset_fieldnames(conf): """ Reset I18N field names such as author, title, etc and other I18N ranking method names such as word similarity. Their translations are taken from the PO files. """ print ">>> Going to reset I18N field names..." from invenio.messages import gettext_set_language, language_list_long from invenio.dbquery import run_sql, IntegrityError ## get field id and name list: field_id_name_list = run_sql("SELECT id, name FROM field") ## get rankmethod id and name list: rankmethod_id_name_list = run_sql("SELECT id, name FROM rnkMETHOD") ## update names for every language: for lang, dummy in language_list_long(): _ = gettext_set_language(lang) ## this list is put here in order for PO system to pick names ## suitable for translation field_name_names = {"any field": _("any field"), "title": _("title"), "author": _("author"), "abstract": _("abstract"), "keyword": _("keyword"), "report number": _("report number"), "subject": _("subject"), "reference": _("reference"), "fulltext": _("fulltext"), "collection": _("collection"), "division": _("division"), "year": _("year"), "journal": _("journal"), "experiment": _("experiment"), "record ID": _("record ID")} ## update I18N names for every language: for (field_id, field_name) in field_id_name_list: if field_name_names.has_key(field_name): try: run_sql("""INSERT INTO fieldname (id_field,ln,type,value) VALUES (%s,%s,%s,%s)""", (field_id, lang, 'ln', field_name_names[field_name])) except IntegrityError: run_sql("""UPDATE fieldname SET value=%s WHERE id_field=%s AND ln=%s AND type=%s""", (field_name_names[field_name], field_id, lang, 'ln',)) ## ditto for rank methods: rankmethod_name_names = {"wrd": _("word similarity"), "demo_jif": _("journal impact factor"), "citation": _("times cited"),} for (rankmethod_id, rankmethod_name) in rankmethod_id_name_list: try: run_sql("""INSERT INTO rnkMETHODNAME (id_rnkMETHOD,ln,type,value) VALUES (%s,%s,%s,%s)""", (rankmethod_id, lang, 'ln', rankmethod_name_names[rankmethod_name])) except IntegrityError: run_sql("""UPDATE rnkMETHODNAME SET value=%s WHERE id_rnkMETHOD=%s AND ln=%s AND type=%s""", (rankmethod_name_names[rankmethod_name], rankmethod_id, lang, 'ln',)) print ">>> I18N field names reset successfully." def test_db_connection(): """ Test DB connection, and if fails, advise user how to set it up. Useful to be called during table creation. """ print "Testing DB connection...", from invenio.textutils import wrap_text_in_a_box from invenio.dbquery import run_sql, Error ## first, test connection to the DB server: try: run_sql("SHOW TABLES") except Error, err: from invenio.dbquery import CFG_DATABASE_HOST, CFG_DATABASE_NAME, \ CFG_DATABASE_USER, CFG_DATABASE_PASS print wrap_text_in_a_box("""\ DATABASE CONNECTIVITY ERROR %(errno)d: %(errmsg)s.\n Perhaps you need to set up database and connection rights? If yes, then please login as MySQL admin user and run the following commands now: $ mysql -h %(dbhost)s -u root -p mysql mysql> CREATE DATABASE %(dbname)s DEFAULT CHARACTER SET utf8; mysql> GRANT ALL PRIVILEGES ON %(dbname)s.* TO %(dbuser)s@%(webhost)s IDENTIFIED BY '%(dbpass)s'; mysql> QUIT The values printed above were detected from your configuration. If they are not right, then please edit your invenio.conf file and rerun 'inveniocfg --update-all' first. If the problem is of different nature, then please inspect the above error message and fix the problem before continuing.""" % \ {'errno': err.args[0], 'errmsg': err.args[1], 'dbname': CFG_DATABASE_NAME, 'dbhost': CFG_DATABASE_HOST, 'dbuser': CFG_DATABASE_USER, 'dbpass': CFG_DATABASE_PASS, 'webhost': CFG_DATABASE_HOST == 'localhost' and 'localhost' or os.popen('hostname -f', 'r').read().strip(), }) sys.exit(1) print "ok" ## second, test insert/select of a Unicode string to detect ## possible Python/MySQL/MySQLdb mis-setup: print "Testing Python/MySQL/MySQLdb UTF-8 chain...", try: beta_in_utf8 = "β" # Greek beta in UTF-8 is 0xCEB2 run_sql("CREATE TEMPORARY TABLE test__invenio__utf8 (x char(1), y varbinary(2)) DEFAULT CHARACTER SET utf8") run_sql("INSERT INTO test__invenio__utf8 (x, y) VALUES (%s, %s)", (beta_in_utf8, beta_in_utf8)) res = run_sql("SELECT x,y,HEX(x),HEX(y),LENGTH(x),LENGTH(y),CHAR_LENGTH(x),CHAR_LENGTH(y) FROM test__invenio__utf8") assert res[0] == ('\xce\xb2', '\xce\xb2', 'CEB2', 'CEB2', 2L, 2L, 1L, 2L) run_sql("DROP TEMPORARY TABLE test__invenio__utf8") except Exception, err: print wrap_text_in_a_box("""\ DATABASE RELATED ERROR %s\n A problem was detected with the UTF-8 treatment in the chain between the Python application, the MySQLdb connector, and the MySQL database. You may perhaps have installed older versions of some prerequisite packages?\n Please check the INSTALL file and please fix this problem before continuing.""" % err) sys.exit(1) print "ok" def cli_cmd_create_tables(conf): """Create and fill Invenio DB tables. Useful for the installation process.""" print ">>> Going to create and fill tables..." from invenio.config import CFG_PREFIX test_db_connection() for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabcreate.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/dbexec < %s/lib/sql/invenio/tabfill.sql" % (CFG_PREFIX, CFG_PREFIX)]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) for cmd in ["%s/bin/webaccessadmin -u admin -c -a" % CFG_PREFIX]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables created and filled successfully." def cli_cmd_drop_tables(conf): """Drop Invenio DB tables. Useful for the uninstallation process.""" print ">>> Going to drop tables..." from invenio.config import CFG_PREFIX from invenio.textutils import wrap_text_in_a_box, wait_for_user wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your database tables!""")) cmd = "%s/bin/dbexec < %s/lib/sql/invenio/tabdrop.sql" % (CFG_PREFIX, CFG_PREFIX) if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables dropped successfully." def cli_cmd_create_demo_site(conf): """Create demo site. Useful for testing purposes.""" print ">>> Going to create demo site..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/democfgdata.sql" % \ (CFG_PREFIX, CFG_PREFIX),]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) cli_cmd_reset_fieldnames(conf) # needed for I18N demo ranking method names for cmd in ["%s/bin/webaccessadmin -u admin -c -r -D" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo site created successfully." def cli_cmd_load_demo_records(conf): """Load demo records. Useful for testing purposes.""" from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql print ">>> Going to load demo records..." run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/bibupload -u admin -i %s/var/tmp/demobibdata.xml" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/bibupload 1" % CFG_PREFIX, "%s/bin/bibindex -u admin" % CFG_PREFIX, "%s/bin/bibindex 2" % CFG_PREFIX, "%s/bin/bibreformat -u admin -o HB" % CFG_PREFIX, "%s/bin/bibreformat 3" % CFG_PREFIX, "%s/bin/bibupload 4" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 5" % CFG_PREFIX, "%s/bin/bibrank -u admin" % CFG_PREFIX, "%s/bin/bibrank 6" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records loaded successfully." def cli_cmd_remove_demo_records(conf): """Remove demo records. Useful when you are finished testing.""" print ">>> Going to remove demo records..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql from invenio.textutils import wrap_text_in_a_box, wait_for_user wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your records and documents!""")) if os.path.exists(CFG_PREFIX + os.sep + 'var' + os.sep + 'data'): shutil.rmtree(CFG_PREFIX + os.sep + 'var' + os.sep + 'data') run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabbibclean.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records removed successfully." def cli_cmd_drop_demo_site(conf): """Drop demo site completely. Useful when you are finished testing.""" print ">>> Going to drop demo site..." from invenio.textutils import wrap_text_in_a_box, wait_for_user wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your site and documents!""")) cli_cmd_drop_tables(conf) cli_cmd_create_tables(conf) cli_cmd_remove_demo_records(conf) print ">>> Demo site dropped successfully." def cli_cmd_run_unit_tests(conf): """Run unit tests, usually on the working demo site.""" from invenio.testutils import build_and_run_unit_test_suite build_and_run_unit_test_suite() def cli_cmd_run_regression_tests(conf): """Run regression tests, usually on the working demo site.""" from invenio.testutils import build_and_run_regression_test_suite build_and_run_regression_test_suite() def cli_cmd_run_web_tests(conf): """Run web tests in a browser. Requires Firefox with Selenium IDE extension.""" from invenio.testutils import build_and_run_web_test_suite build_and_run_web_test_suite() def cli_cmd_create_apache_conf(conf): """ Create Apache conf files for this site, keeping previous files in a backup copy. """ print ">>> Going to create Apache conf files..." from invenio.textutils import wrap_text_in_a_box apache_conf_dir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'apache' if not os.path.exists(apache_conf_dir): os.mkdir(apache_conf_dir) apache_vhost_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost.conf' apache_vhost_ssl_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost-ssl.conf' apache_vhost_body = """\ AddDefaultCharset UTF-8 ServerSignature Off ServerTokens Prod NameVirtualHost *:80 Listen 80 deny from all deny from all ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s DocumentRoot %(webdir)s Options FollowSymLinks MultiViews AllowOverride None Order allow,deny allow from all ErrorLog %(logdir)s/apache.err LogLevel warn CustomLog %(logdir)s/apache.log combined DirectoryIndex index.en.html index.html SetHandler python-program PythonHandler invenio.webinterface_layout PythonDebug On AddHandler python-program .py PythonHandler mod_python.publisher PythonDebug On """ % {'servername': conf.get('Invenio', 'CFG_SITE_URL').replace("http://", ""), 'serveralias': conf.get('Invenio', 'CFG_SITE_URL').replace("http://", "").split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), } apache_vhost_ssl_body = """\ ServerSignature Off ServerTokens Prod Listen 443 NameVirtualHost *:443 #SSLCertificateFile /etc/apache2/ssl/apache.pem SSLCertificateFile /etc/apache2/ssl/server.crt SSLCertificateKeyFile /etc/apache2/ssl/server.key deny from all deny from all ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s SSLEngine on DocumentRoot %(webdir)s Options FollowSymLinks MultiViews AllowOverride None Order allow,deny allow from all ErrorLog %(logdir)s/apache-ssl.err LogLevel warn CustomLog %(logdir)s/apache-ssl.log combined DirectoryIndex index.en.html index.html SetHandler python-program PythonHandler invenio.webinterface_layout PythonDebug On AddHandler python-program .py PythonHandler mod_python.publisher PythonDebug On """ % {'servername': conf.get('Invenio', 'CFG_SITE_SECURE_URL').replace("https://", ""), 'serveralias': conf.get('Invenio', 'CFG_SITE_SECURE_URL').replace("https://", "").split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), } # write HTTP vhost snippet: if os.path.exists(apache_vhost_file): shutil.copy(apache_vhost_file, apache_vhost_file + '.OLD') fdesc = open(apache_vhost_file, 'w') fdesc.write(apache_vhost_body) fdesc.close() print "Created file", apache_vhost_file # write HTTPS vhost snippet: if conf.get('Invenio', 'CFG_SITE_SECURE_URL') != \ conf.get('Invenio', 'CFG_SITE_URL'): if os.path.exists(apache_vhost_ssl_file): shutil.copy(apache_vhost_ssl_file, apache_vhost_ssl_file + '.OLD') fdesc = open(apache_vhost_ssl_file, 'w') fdesc.write(apache_vhost_ssl_body) fdesc.close() print "Created file", apache_vhost_ssl_file print wrap_text_in_a_box("""\ Apache virtual host configurations for your site have been created. You can check created files and put the following include statements in your httpd.conf:\n Include %s Include %s """ % (apache_vhost_file, apache_vhost_ssl_file)) print ">>> Apache conf files created." def cli_cmd_get(conf, varname): """ Return value of VARNAME read from CONF files. Useful for third-party programs to access values of conf options such as CFG_PREFIX. Return None if VARNAME is not found. """ # do not pay attention to upper/lower case: varname = varname.lower() # do not pay attention to section names yet: all_options = {} for section in conf.sections(): for option in conf.options(section): all_options[option] = conf.get(section, option) return all_options.get(varname, None) def cli_cmd_list(conf): """ Print a list of all conf options and values from CONF. """ for section in conf.sections(): for option in conf.options(section): print option, '=', conf.get(section, option) def _grep_version_from_executable(path_to_exec, version_regexp): """ Try to detect a program version by digging into its binary PATH_TO_EXEC and looking for VERSION_REGEXP. Return program version as a string. Return empty string if not succeeded. """ from invenio.shellutils import run_shell_command exec_version = "" if os.path.exists(path_to_exec): dummy1, cmd2_out, dummy2 = run_shell_command("strings %s | grep %s" % \ (path_to_exec, version_regexp)) if cmd2_out: for cmd2_out_line in cmd2_out.split("\n"): if len(cmd2_out_line) > len(exec_version): # the longest the better exec_version = cmd2_out_line return exec_version def detect_apache_version(): """ Try to detect Apache version by localizing httpd or apache executables and grepping inside binaries. Return list of all found Apache versions and paths. (For a given executable, the returned format is 'apache_version [apache_path]'.) Return empty list if no success. """ from invenio.shellutils import run_shell_command out = [] dummy1, cmd_out, dummy2 = run_shell_command("locate bin/httpd bin/apache") for apache in cmd_out.split("\n"): apache_version = _grep_version_from_executable(apache, '^Apache\/') if apache_version: out.append("%s [%s]" % (apache_version, apache)) return out def detect_modpython_version(): """ Try to detect mod_python version, either from mod_python import or from grepping inside mod_python.so, like Apache. Return list of all found mod_python versions and paths. Return empty list if no success. """ out = [] try: from mod_python import version out.append(version) except ImportError: # try to detect via looking at mod_python.so: from invenio.shellutils import run_shell_command version = "" dummy1, cmd_out, dummy2 = run_shell_command("locate /mod_python.so") for modpython in cmd_out.split("\n"): modpython_version = _grep_version_from_executable(modpython, '^mod_python\/') if modpython_version: out.append("%s [%s]" % (modpython_version, modpython)) return out def cli_cmd_detect_system_details(conf): """ Detect and print system details such as Apache/Python/MySQL versions etc. Useful for debugging problems on various OS. """ import MySQLdb print ">>> Going to detect system details..." print "* Hostname: " + socket.gethostname() print "* Invenio version: " + conf.get("Invenio", "CFG_VERSION") print "* Python version: " + sys.version.replace("\n", " ") print "* Apache version: " + ";\n ".join(detect_apache_version()) print "* mod_python version: " + ";\n ".join(detect_modpython_version()) print "* MySQLdb version: " + MySQLdb.__version__ try: from invenio.dbquery import run_sql print "* MySQL version:" for key, val in run_sql("SHOW VARIABLES LIKE 'version%'") + \ run_sql("SHOW VARIABLES LIKE 'charact%'") + \ run_sql("SHOW VARIABLES LIKE 'collat%'"): if False: print " - %s: %s" % (key, val) elif key in ['version', 'character_set_client', 'character_set_connection', 'character_set_database', 'character_set_results', 'character_set_server', 'character_set_system', 'collation_connection', 'collation_database', 'collation_server']: print " - %s: %s" % (key, val) except ImportError: print "* ERROR: cannot import dbquery" print ">>> System details detected successfully." def main(): """Main entry point.""" conf = ConfigParser() if '--help' in sys.argv or \ '-h' in sys.argv: print_usage() elif '--version' in sys.argv or \ '-V' in sys.argv: print_version() else: confdir = None if '--conf-dir' in sys.argv: try: confdir = sys.argv[sys.argv.index('--conf-dir') + 1] except IndexError: pass # missing --conf-dir argument value if not os.path.exists(confdir): print "ERROR: bad or missing --conf-dir option value." sys.exit(1) else: ## try to detect path to conf dir (relative to this bin dir): confdir = re.sub(r'/bin$', '/etc', sys.path[0]) ## read conf files: for conffile in [confdir + os.sep + 'invenio.conf', confdir + os.sep + 'invenio-autotools.conf', confdir + os.sep + 'invenio-local.conf',]: if os.path.exists(conffile): conf.read(conffile) else: if not conffile.endswith("invenio-local.conf"): # invenio-local.conf is optional, otherwise stop print "ERROR: Badly guessed conf file location", conffile print "(Please use --conf-dir option.)" sys.exit(1) ## decide what to do: done = False for opt_idx in range(0, len(sys.argv)): opt = sys.argv[opt_idx] if opt == '--conf-dir': # already treated before, so skip silently: pass elif opt == '--get': try: varname = sys.argv[opt_idx + 1] except IndexError: print "ERROR: bad or missing --get option value." sys.exit(1) if varname.startswith('-'): print "ERROR: bad or missing --get option value." sys.exit(1) varvalue = cli_cmd_get(conf, varname) if varvalue is not None: print varvalue else: sys.exit(1) done = True elif opt == '--list': cli_cmd_list(conf) done = True elif opt == '--detect-system-details': cli_cmd_detect_system_details(conf) done = True elif opt == '--create-tables': cli_cmd_create_tables(conf) done = True elif opt == '--drop-tables': cli_cmd_drop_tables(conf) done = True elif opt == '--create-demo-site': cli_cmd_create_demo_site(conf) done = True elif opt == '--load-demo-records': cli_cmd_load_demo_records(conf) done = True elif opt == '--remove-demo-records': cli_cmd_remove_demo_records(conf) done = True elif opt == '--drop-demo-site': cli_cmd_drop_demo_site(conf) done = True elif opt == '--run-unit-tests': cli_cmd_run_unit_tests(conf) done = True elif opt == '--run-regression-tests': cli_cmd_run_regression_tests(conf) done = True elif opt == '--run-web-tests': cli_cmd_run_web_tests(conf) done = True elif opt == '--update-all': cli_cmd_update_config_py(conf) cli_cmd_update_dbquery_py(conf) cli_cmd_update_dbexec(conf) cli_cmd_update_bibconvert_tpl(conf) cli_cmd_update_web_tests(conf) done = True elif opt == '--update-config-py': cli_cmd_update_config_py(conf) done = True elif opt == '--update-dbquery-py': cli_cmd_update_dbquery_py(conf) done = True elif opt == '--update-dbexec': cli_cmd_update_dbexec(conf) done = True elif opt == '--update-bibconvert-tpl': cli_cmd_update_bibconvert_tpl(conf) done = True elif opt == '--update-web-tests': cli_cmd_update_web_tests(conf) done = True elif opt == '--reset-all': cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) done = True elif opt == '--reset-sitename': cli_cmd_reset_sitename(conf) done = True elif opt == '--reset-siteadminemail': cli_cmd_reset_siteadminemail(conf) done = True elif opt == '--reset-fieldnames': cli_cmd_reset_fieldnames(conf) done = True elif opt == '--create-apache-conf': cli_cmd_create_apache_conf(conf) done = True elif opt.startswith("-") and opt != '--yes-i-know': print "ERROR: unknown option", opt sys.exit(1) if not done: print """ERROR: Please specify a command. Please see '--help'.""" sys.exit(1) if __name__ == '__main__': main() diff --git a/modules/websearch/lib/websearch_templates.py b/modules/websearch/lib/websearch_templates.py index 3ce26d148..80fb44729 100644 --- a/modules/websearch/lib/websearch_templates.py +++ b/modules/websearch/lib/websearch_templates.py @@ -1,3469 +1,3469 @@ # -*- codinge: utf-8 -*- ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable-msg=C0301 __revision__ = "$Id$" import time import cgi import gettext import string import locale from urllib import quote, urlencode from invenio.config import \ CFG_WEBSEARCH_SUPERSIMPLESEARCH_PATTERN_BOX_WIDTH, \ CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, \ CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, \ CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD, \ CFG_WEBSEARCH_USE_ALEPH_SYSNOS, \ CFG_WEBSEARCH_SPLIT_BY_COLLECTION, \ CFG_BIBRANK_SHOW_READING_STATS, \ CFG_BIBRANK_SHOW_DOWNLOAD_STATS, \ CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS, \ CFG_BIBRANK_SHOW_CITATION_LINKS, \ CFG_BIBRANK_SHOW_CITATION_STATS, \ CFG_BIBRANK_SHOW_CITATION_GRAPHS, \ CFG_WEBSEARCH_INSTANT_BROWSE_RSS, \ CFG_WEBSEARCH_RSS_TTL, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_NAME_INTL, \ CFG_VERSION, \ CFG_SITE_URL, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_INSPIRE_SITE, \ - CFG_DEFAULT_SEARCH_INTERFACE, \ - CFG_ENABLED_SEARCH_INTERFACES + CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \ + CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES from invenio.dbquery import run_sql from invenio.messages import gettext_set_language #from invenio.search_engine_config import CFG_EXPERIMENTAL_FEATURES from invenio.urlutils import make_canonical_urlargd, drop_default_urlargd, create_html_link, create_url from invenio.htmlutils import nmtoken_from_string from invenio.webinterface_handler import wash_urlargd from invenio.bibrank_citation_searcher import get_cited_by_count from invenio.intbitset import intbitset from invenio.websearch_external_collections import external_collection_get_state def get_fieldvalues(recID, tag): """Return list of field values for field TAG inside record RECID. FIXME: should be imported commonly for search_engine too.""" out = [] if tag == "001___": # we have asked for recID that is not stored in bibXXx tables out.append(str(recID)) else: # we are going to look inside bibXXx tables digit = tag[0:2] bx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag LIKE '%s'" \ "ORDER BY bibx.field_number, bx.tag ASC" % (bx, bibx, recID, tag) res = run_sql(query) for row in res: out.append(row[0]) return out class Template: # This dictionary maps CDS Invenio language code to locale codes (ISO 639) tmpl_localemap = { 'bg': 'bg_BG', 'ca': 'ca_ES', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES', 'pt': 'pt_BR', 'fr': 'fr_FR', 'it': 'it_IT', 'ru': 'ru_RU', 'sk': 'sk_SK', 'cs': 'cs_CZ', 'no': 'no_NO', 'sv': 'sv_SE', 'uk': 'uk_UA', 'ja': 'ja_JA', 'pl': 'pl_PL', 'hr': 'hr_HR', 'zh_CN': 'zh_CN', 'zh_TW': 'zh_TW', 'hu': 'hu_HU', } tmpl_default_locale = "en_US" # which locale to use by default, useful in case of failure # Type of the allowed parameters for the web interface for search results search_results_default_urlargd = { 'cc': (str, CFG_SITE_NAME), 'c': (list, []), 'p': (str, ""), 'f': (str, ""), 'rg': (int, 10), 'sf': (str, ""), 'so': (str, "d"), 'sp': (str, ""), 'rm': (str, ""), 'of': (str, "hb"), 'ot': (list, []), - 'as': (int, CFG_DEFAULT_SEARCH_INTERFACE), + 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'p1': (str, ""), 'f1': (str, ""), 'm1': (str, ""), 'op1':(str, ""), 'p2': (str, ""), 'f2': (str, ""), 'm2': (str, ""), 'op2':(str, ""), 'p3': (str, ""), 'f3': (str, ""), 'm3': (str, ""), 'sc': (int, 0), 'jrec': (int, 0), 'recid': (int, -1), 'recidb': (int, -1), 'sysno': (str, ""), 'id': (int, -1), 'idb': (int, -1), 'sysnb': (str, ""), 'action': (str, "search"), 'action_search': (str, ""), 'action_browse': (str, ""), 'd1': (str, ""), 'd1y': (int, 0), 'd1m': (int, 0), 'd1d': (int, 0), 'd2': (str, ""), 'd2y': (int, 0), 'd2m': (int, 0), 'd2d': (int, 0), 'dt': (str, ""), 'ap': (int, 1), 'verbose': (int, 0), 'ec': (list, []), } # ...and for search interfaces search_interface_default_urlargd = { - 'as': (int, CFG_DEFAULT_SEARCH_INTERFACE), + 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'verbose': (int, 0)} # ...and for RSS feeds rss_default_urlargd = {'c' : (list, []), 'cc' : (str, ""), 'p' : (str, ""), 'f' : (str, ""), 'p1' : (str, ""), 'f1' : (str, ""), 'm1' : (str, ""), 'op1': (str, ""), 'p2' : (str, ""), 'f2' : (str, ""), 'm2' : (str, ""), 'op2': (str, ""), 'p3' : (str, ""), 'f3' : (str, ""), 'm3' : (str, "")} tmpl_openurl_accepted_args = { 'id' : (list, []), 'genre' : (str, ''), 'aulast' : (str, ''), 'aufirst' : (str, ''), 'auinit' : (str, ''), 'auinit1' : (str, ''), 'auinitm' : (str, ''), 'issn' : (str, ''), 'eissn' : (str, ''), 'coden' : (str, ''), 'isbn' : (str, ''), 'sici' : (str, ''), 'bici' : (str, ''), 'title' : (str, ''), 'stitle' : (str, ''), 'atitle' : (str, ''), 'volume' : (str, ''), 'part' : (str, ''), 'issue' : (str, ''), 'spage' : (str, ''), 'epage' : (str, ''), 'pages' : (str, ''), 'artnum' : (str, ''), 'date' : (str, ''), 'ssn' : (str, ''), 'quarter' : (str, ''), 'url_ver' : (str, ''), 'ctx_ver' : (str, ''), 'rft_val_fmt' : (str, ''), 'rft_id' : (list, []), 'rft.atitle' : (str, ''), 'rft.title' : (str, ''), 'rft.jtitle' : (str, ''), 'rft.stitle' : (str, ''), 'rft.date' : (str, ''), 'rft.volume' : (str, ''), 'rft.issue' : (str, ''), 'rft.spage' : (str, ''), 'rft.epage' : (str, ''), 'rft.pages' : (str, ''), 'rft.artnumber' : (str, ''), 'rft.issn' : (str, ''), 'rft.eissn' : (str, ''), 'rft.aulast' : (str, ''), 'rft.aufirst' : (str, ''), 'rft.auinit' : (str, ''), 'rft.auinit1' : (str, ''), 'rft.auinitm' : (str, ''), 'rft.ausuffix' : (str, ''), 'rft.au' : (list, []), 'rft.aucorp' : (str, ''), 'rft.isbn' : (str, ''), 'rft.coden' : (str, ''), 'rft.sici' : (str, ''), 'rft.genre' : (str, 'unknown'), 'rft.chron' : (str, ''), 'rft.ssn' : (str, ''), 'rft.quarter' : (int, ''), 'rft.part' : (str, ''), 'rft.btitle' : (str, ''), 'rft.isbn' : (str, ''), 'rft.atitle' : (str, ''), 'rft.place' : (str, ''), 'rft.pub' : (str, ''), 'rft.edition' : (str, ''), 'rft.tpages' : (str, ''), 'rft.series' : (str, ''), } def tmpl_openurl2invenio(self, openurl_data): """ Return an Invenio url corresponding to a search with the data included in the openurl form map. """ from invenio.search_engine import perform_request_search doi = '' pmid = '' bibcode = '' oai = '' issn = '' isbn = '' for elem in openurl_data['id']: if elem.startswith('doi:'): doi = elem[len('doi:'):] elif elem.startswith('pmid:'): pmid = elem[len('pmid:'):] elif elem.startswith('bibcode:'): bibcode = elem[len('bibcode:'):] elif elem.startswith('oai:'): oai = elem[len('oai:'):] for elem in openurl_data['rft_id']: if elem.startswith('info:doi/'): doi = elem[len('info:doi/'):] elif elem.startswith('info:pmid/'): pmid = elem[len('info:pmid/'):] elif elem.startswith('info:bibcode/'): bibcode = elem[len('info:bibcode/'):] elif elem.startswith('info:oai/'): oai = elem[len('info:oai/')] elif elem.startswith('urn:ISBN:'): isbn = elem[len('urn:ISBN:'):] elif elem.startswith('urn:ISSN:'): issn = elem[len('urn:ISSN:'):] ## Building author query aulast = openurl_data['rft.aulast'] or openurl_data['aulast'] aufirst = openurl_data['rft.aufirst'] or openurl_data['aufirst'] auinit = openurl_data['rft.auinit'] or \ openurl_data['auinit'] or \ openurl_data['rft.auinit1'] + ' ' + openurl_data['rft.auinitm'] or \ openurl_data['auinit1'] + ' ' + openurl_data['auinitm'] or aufirst[:1] auinit = auinit.upper() if aulast and aufirst: author_query = 'author:"%s, %s" or author:"%s, %s"' % (aulast, aufirst, aulast, auinit) elif aulast and auinit: author_query = 'author:"%s, %s"' % (aulast, auinit) else: author_query = '' ## Building title query title = openurl_data['rft.atitle'] or \ openurl_data['atitle'] or \ openurl_data['rft.btitle'] or \ openurl_data['rft.title'] or \ openurl_data['title'] if title: title_query = 'title:"%s"' % title else: title_query = '' ## Building journal query jtitle = openurl_data['rft.stitle'] or \ openurl_data['stitle'] or \ openurl_data['rft.jtitle'] or \ openurl_data['title'] if jtitle: journal_query = 'journal:"%s"' % jtitle else: journal_query = '' ## Building isbn query isbn = isbn or openurl_data['rft.isbn'] or \ openurl_data['isbn'] isbn = isbn.replace(' ', '').replace('-', '') if isbn: isbn_query = 'isbn:"%s"' % isbn else: isbn_query = '' ## Building issn query issn = issn or openurl_data['rft.eissn'] or \ openurl_data['eissn'] or \ openurl_data['rft.issn'] or \ openurl_data['issn'] if issn: issn_query = 'issn:"%s"' % issn else: issn_query = '' ## Building coden query coden = openurl_data['rft.coden'] or openurl_data['coden'] if coden: coden_query = 'coden:"%s"' % coden else: coden_query = '' ## Building doi query if False: #doi: #FIXME Temporaly disabled until doi field is properly setup doi_query = 'doi:"%s"' % doi else: doi_query = '' ## Trying possible searches if doi_query: if perform_request_search(p=doi_query): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : doi_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if isbn_query: if perform_request_search(p=isbn_query): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : isbn_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if coden_query: if perform_request_search(p=coden_query): return '%s/search?' % (CFG_SITE_URL, urlencode({ 'p' : coden_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if author_query and title_query: if perform_request_search(p='%s and %s' % (title_query, author_query)): return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : '%s and %s' % (title_query, author_query), 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) if title_query: result = len(perform_request_search(p=title_query)) if result == 1: return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : title_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hd'})) elif result > 1: return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : title_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'})) if title: return '%s/search?%s' % (CFG_SITE_URL, urlencode({ 'p' : title, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'})) ## Nothing worked, let's return a search that the user can improve if author_query and title_query: return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({ 'p' : '%s and %s' % (title_query, author_query), 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'}, {})) elif title_query: return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({ 'p' : title_query, 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'}, {})) else: ## Mmh. Too few information provided. return '%s/search%s' % (CFG_SITE_URL, make_canonical_urlargd({ 'p' : 'recid:-1', 'sc' : CFG_WEBSEARCH_SPLIT_BY_COLLECTION, 'of' : 'hb'}, {})) def build_search_url(self, known_parameters={}, **kargs): """ Helper for generating a canonical search url. 'known_parameters' is the list of query parameters you inherit from your current query. You can then pass keyword arguments to modify this query. build_search_url(known_parameters, of="xm") The generated URL is absolute. """ parameters = {} parameters.update(known_parameters) parameters.update(kargs) # Now, we only have the arguments which have _not_ their default value parameters = drop_default_urlargd(parameters, self.search_results_default_urlargd) # Asking for a recid? Return a /record/ URL if 'recid' in parameters: target = "%s/record/%d" % (CFG_SITE_URL, parameters['recid']) del parameters['recid'] target += make_canonical_urlargd(parameters, self.search_results_default_urlargd) return target return "%s/search%s" % (CFG_SITE_URL, make_canonical_urlargd(parameters, self.search_results_default_urlargd)) def build_search_interface_url(self, known_parameters={}, **kargs): """ Helper for generating a canonical search interface URL.""" parameters = {} parameters.update(known_parameters) parameters.update(kargs) c = parameters['c'] del parameters['c'] # Now, we only have the arguments which have _not_ their default value if c and c != CFG_SITE_NAME: base = CFG_SITE_URL + '/collection/' + quote(c) else: base = CFG_SITE_URL return create_url(base, drop_default_urlargd(parameters, self.search_results_default_urlargd)) def build_rss_url(self, known_parameters, **kargs): """Helper for generating a canonical RSS URL""" parameters = {} parameters.update(known_parameters) parameters.update(kargs) # Keep only interesting parameters argd = wash_urlargd(parameters, self.rss_default_urlargd) if argd: # Handle 'c' differently since it is a list c = argd.get('c', []) del argd['c'] # Create query, and drop empty params args = make_canonical_urlargd(argd, self.rss_default_urlargd) if c != []: # Add collections c = [quote(coll) for coll in c] if args == '': args += '?' else: args += '&' args += 'c=' + '&c='.join(c) return CFG_SITE_URL + '/rss' + args def tmpl_record_page_header_content(self, req, recid, ln): """ Provide extra information in the header of /record pages """ _ = gettext_set_language(ln) title = get_fieldvalues(recid, "245__a") if title: title = _("Record") + '#%d: %s' %(recid, cgi.escape(title[0])) else: title = _("Record") + ' #%d' % recid keywords = ', '.join(get_fieldvalues(recid, "6531_a")) description = ' '.join(get_fieldvalues(recid, "520__a")) description += "\n" description += '; '.join(get_fieldvalues(recid, "100__a") + get_fieldvalues(recid, "700__a")) return [cgi.escape(x, True) for x in (title, description, keywords)] def tmpl_navtrail_links(self, as, ln, dads): """ Creates the navigation bar at top of each search page (*Home > Root collection > subcollection > ...*) Parameters: - 'as' *int* - Should we display an advanced search box? - 'ln' *string* - The language to display - 'separator' *string* - The separator between two consecutive collections - 'dads' *list* - A list of parent links, eachone being a dictionary of ('name', 'longname') """ out = [] for url, name in dads: out.append(create_html_link(self.build_search_interface_url(c=url, as=as, ln=ln), {}, cgi.escape(name), {'class': 'navtrail'})) return ' > '.join(out) def tmpl_webcoll_body(self, ln, collection, te_portalbox, searchfor, np_portalbox, narrowsearch, focuson, instantbrowse, ne_portalbox): """ Creates the body of the main search page. Parameters: - 'ln' *string* - language of the page being generated - 'collection' - collection id of the page being generated - 'te_portalbox' *string* - The HTML code for the portalbox on top of search - 'searchfor' *string* - The HTML code for the search for box - 'np_portalbox' *string* - The HTML code for the portalbox on bottom of search - 'narrowsearch' *string* - The HTML code for the search categories (left bottom of page) - 'focuson' *string* - The HTML code for the "focuson" categories (right bottom of page) - 'ne_portalbox' *string* - The HTML code for the bottom of the page """ if not narrowsearch: narrowsearch = instantbrowse body = '''
%(searchfor)s %(np_portalbox)s ''' % { 'siteurl' : CFG_SITE_URL, 'searchfor' : searchfor, 'np_portalbox' : np_portalbox, 'narrowsearch' : narrowsearch } if focuson: body += """""" body += """
%(narrowsearch)s""" + focuson + """
%(ne_portalbox)s
""" % {'ne_portalbox' : ne_portalbox} return body def tmpl_portalbox(self, title, body): """Creates portalboxes based on the parameters Parameters: - 'title' *string* - The title of the box - 'body' *string* - The HTML code for the body of the box """ out = """
%(title)s
%(body)s
""" % {'title' : cgi.escape(title), 'body' : body} return out def tmpl_searchfor_super_simple(self, ln, collection_id, collection_name, record_count, example_search_queries): # EXPERIMENTAL """Produces super simple *Search for* box for the current collection. Parameters: - 'ln' *string* - *str* The language to display - 'collection_id' - *str* The collection id - 'collection_name' - *str* The collection name in current language - 'example_search_queries' - *list* List of search queries given as example for this collection """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' argd = drop_default_urlargd({'ln': ln, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION}, self.search_results_default_urlargd) # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) header = _("Search %s records for:") % \ self.tmpl_nbrecs_info(record_count, "", "") asearchurl = self.build_search_interface_url(c=collection_id, - as=max(CFG_ENABLED_SEARCH_INTERFACES), + as=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln) # Build example of queries for this collection example_search_queries_links = [create_html_link(self.build_search_url(p=example_query, ln=ln, as=-1, c=collection_id), {}, cgi.escape(example_query), {'class': 'examplequery'}) \ for example_query in example_search_queries] example_query_html = '' if len(example_search_queries) > 0: example_query_link = example_search_queries_links[0] # offers more examples if possible more = '' if len(example_search_queries_links) > 1: more = ''' ''' % {'more_example_queries': '
'.join(example_search_queries_links[1:]), 'show_less':_("less"), 'show_more':_("more")} example_query_html += '''

%(example)s%(more)s

''' % {'example': _("Example: %(x_sample_search_query)s") % \ {'x_sample_search_query': example_query_link}, 'more': more} # display options to search in current collection or everywhere search_in = '' if collection_name != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME): search_in += ''' ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \ {'x_collection_name': collection_name}, 'collection_id': collection_id, 'root_collection_name': CFG_SITE_NAME, 'search_everywhere': _("Search everywhere")} # print commentary start: out += '''

%(example_query_html)s
%(msg_search_tips)s
%(asearch)s
%(search_in)s ''' % {'ln' : ln, 'sizepattern' : CFG_WEBSEARCH_SUPERSIMPLESEARCH_PATTERN_BOX_WIDTH, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'siteurl' : CFG_SITE_URL, 'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')), 'header' : header, 'msg_search' : _('Search'), 'msg_browse' : _('Browse'), 'msg_search_tips' : _('Search Tips'), 'search_in': search_in, 'example_query_html': example_query_html} return out def tmpl_searchfor_simple(self, ln, collection_id, collection_name, record_count, middle_option): """Produces simple *Search for* box for the current collection. Parameters: - 'ln' *string* - *str* The language to display - 'collection_id' - *str* The collection id - 'collection_name' - *str* The collection name in current language - 'record_count' - *str* Number of records in this collection - 'middle_option' *string* - HTML code for the options (any field, specific fields ...) """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' argd = drop_default_urlargd({'ln': ln, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION}, self.search_results_default_urlargd) # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) header = _("Search %s records for:") % \ self.tmpl_nbrecs_info(record_count, "", "") asearchurl = self.build_search_interface_url(c=collection_id, - as=max(CFG_ENABLED_SEARCH_INTERFACES), + as=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln) # print commentary start: out += ''' ''' % {'ln' : ln, 'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'siteurl' : CFG_SITE_URL, 'asearch' : create_html_link(asearchurl, {}, _('Advanced Search')), 'header' : header, 'middle_option' : middle_option, 'msg_search' : _('Search'), 'msg_browse' : _('Browse'), 'msg_search_tips' : _('Search Tips')} return out def tmpl_searchfor_advanced(self, ln, # current language collection_id, collection_name, record_count, middle_option_1, middle_option_2, middle_option_3, searchoptions, sortoptions, rankoptions, displayoptions, formatoptions ): """ Produces advanced *Search for* box for the current collection. Parameters: - 'ln' *string* - The language to display - 'middle_option_1' *string* - HTML code for the first row of options (any field, specific fields ...) - 'middle_option_2' *string* - HTML code for the second row of options (any field, specific fields ...) - 'middle_option_3' *string* - HTML code for the third row of options (any field, specific fields ...) - 'searchoptions' *string* - HTML code for the search options - 'sortoptions' *string* - HTML code for the sort options - 'rankoptions' *string* - HTML code for the rank options - 'displayoptions' *string* - HTML code for the display options - 'formatoptions' *string* - HTML code for the format options """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' argd = drop_default_urlargd({'ln': ln, 'as': 1, 'cc': collection_id, 'sc': CFG_WEBSEARCH_SPLIT_BY_COLLECTION}, self.search_results_default_urlargd) # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) header = _("Search %s records for") % \ self.tmpl_nbrecs_info(record_count, "", "") header += ':' - ssearchurl = self.build_search_interface_url(c=collection_id, as=min(CFG_ENABLED_SEARCH_INTERFACES), ln=ln) + ssearchurl = self.build_search_interface_url(c=collection_id, as=min(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), ln=ln) out += ''' ''' % {'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'siteurl' : CFG_SITE_URL, 'ssearch' : create_html_link(ssearchurl, {}, _("Simple Search")), 'header' : header, 'matchbox_m1' : self.tmpl_matchtype_box('m1', ln=ln), 'middle_option_1' : middle_option_1, 'andornot_op1' : self.tmpl_andornot_box('op1', ln=ln), 'matchbox_m2' : self.tmpl_matchtype_box('m2', ln=ln), 'middle_option_2' : middle_option_2, 'andornot_op2' : self.tmpl_andornot_box('op2', ln=ln), 'matchbox_m3' : self.tmpl_matchtype_box('m3', ln=ln), 'middle_option_3' : middle_option_3, 'msg_search' : _("Search"), 'msg_browse' : _("Browse"), 'msg_search_tips' : _("Search Tips")} if (searchoptions): out += """""" % { 'searchheader' : _("Search options:"), 'searchoptions' : searchoptions } out += """ """ % { 'added' : _("Added/modified since:"), 'until' : _("until:"), 'added_or_modified': self.tmpl_inputdatetype(ln=ln), 'date_added' : self.tmpl_inputdate("d1", ln=ln), 'date_until' : self.tmpl_inputdate("d2", ln=ln), 'msg_sort' : _("Sort by:"), 'msg_display' : _("Display results:"), 'msg_format' : _("Output format:"), 'sortoptions' : sortoptions, 'rankoptions' : rankoptions, 'displayoptions' : displayoptions, 'formatoptions' : formatoptions } return out def tmpl_matchtype_box(self, name='m', value='', ln='en'): """Returns HTML code for the 'match type' selection box. Parameters: - 'name' *string* - The name of the produced select - 'value' *string* - The selected value (if any value is already selected) - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) out = """ """ % {'name' : name, 'sela' : self.tmpl_is_selected('a', value), 'opta' : _("All of the words:"), 'selo' : self.tmpl_is_selected('o', value), 'opto' : _("Any of the words:"), 'sele' : self.tmpl_is_selected('e', value), 'opte' : _("Exact phrase:"), 'selp' : self.tmpl_is_selected('p', value), 'optp' : _("Partial phrase:"), 'selr' : self.tmpl_is_selected('r', value), 'optr' : _("Regular expression:") } return out def tmpl_is_selected(self, var, fld): """ Checks if *var* and *fld* are equal, and if yes, returns ' selected="selected"'. Useful for select boxes. Parameters: - 'var' *string* - First value to compare - 'fld' *string* - Second value to compare """ if var == fld: return ' selected="selected"' else: return "" def tmpl_andornot_box(self, name='op', value='', ln='en'): """ Returns HTML code for the AND/OR/NOT selection box. Parameters: - 'name' *string* - The name of the produced select - 'value' *string* - The selected value (if any value is already selected) - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) out = """ """ % {'name' : name, 'sela' : self.tmpl_is_selected('a', value), 'opta' : _("AND"), 'selo' : self.tmpl_is_selected('o', value), 'opto' : _("OR"), 'seln' : self.tmpl_is_selected('n', value), 'optn' : _("AND NOT") } return out def tmpl_inputdate(self, name, ln, sy = 0, sm = 0, sd = 0): """ Produces *From Date*, *Until Date* kind of selection box. Suitable for search options. Parameters: - 'name' *string* - The base name of the produced selects - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) box = """ """ # month box += """ """ # year box += """ """ return box def tmpl_inputdatetype(self, dt='', ln=CFG_SITE_LANG): """ Produces input date type selection box to choose added-or-modified date search option. Parameters: - 'dt' *string - date type (c=created, m=modified) - 'ln' *string* - the language to display """ # load the right message language _ = gettext_set_language(ln) box = """ """ % { 'added': _("Added since:"), 'modified': _("Modified since:"), 'sel': self.tmpl_is_selected(dt, 'm'), } return box def tmpl_narrowsearch(self, as, ln, type, father, has_grandchildren, sons, display_grandsons, grandsons): """ Creates list of collection descendants of type *type* under title *title*. If as==1, then links to Advanced Search interfaces; otherwise Simple Search. Suitable for 'Narrow search' and 'Focus on' boxes. Parameters: - 'as' *bool* - Should we display an advanced search box? - 'ln' *string* - The language to display - 'type' *string* - The type of the produced box (virtual collections or normal collections) - 'father' *collection* - The current collection - 'has_grandchildren' *bool* - If the current collection has grand children - 'sons' *list* - The list of the sub-collections (first level) - 'display_grandsons' *bool* - If the grand children collections should be displayed (2 level deep display) - 'grandsons' *list* - The list of sub-collections (second level) """ # load the right message language _ = gettext_set_language(ln) title = {'r': _("Narrow by collection:"), 'v': _("Focus on:")}[type] if has_grandchildren: style_prolog = "" style_epilog = "" else: style_prolog = "" style_epilog = "" out = """""" % {'title' : title, 'narrowsearchbox': {'r': 'narrowsearchbox', 'v': 'focusonsearchbox'}[type]} # iterate through sons: i = 0 for son in sons: out += """""" % {'name' : cgi.escape(son.name) } else: out += """""" % {'name' : cgi.escape(son.name) } else: out += '' out += """""" i += 1 out += "
%(title)s
""" % \ { 'narrowsearchbox': {'r': 'narrowsearchbox', 'v': 'focusonsearchbox'}[type]} if type == 'r': if son.restricted_p() and son.restricted_p() != father.restricted_p(): out += """%(link)s%(recs)s """ % { 'link': create_html_link(self.build_search_interface_url(c=son.name, ln=ln, as=as), {}, style_prolog + cgi.escape(son.get_name(ln)) + style_epilog), 'recs' : self.tmpl_nbrecs_info(son.nbrecs, ln=ln)} if son.restricted_p(): out += """ [%(msg)s] """ % { 'msg' : _("restricted") } if display_grandsons and len(grandsons[i]): # iterate trough grandsons: out += """
""" for grandson in grandsons[i]: out += """ %(link)s%(nbrec)s """ % { 'link': create_html_link(self.build_search_interface_url(c=grandson.name, ln=ln, as=as), {}, cgi.escape(grandson.get_name(ln))), 'nbrec' : self.tmpl_nbrecs_info(grandson.nbrecs, ln=ln)} out += """
" return out def tmpl_searchalso(self, ln, engines_list, collection_id): _ = gettext_set_language(ln) box_name = _("Search also:") html = """
""" % locals() for engine in engines_list: internal_name = engine.name name = _(internal_name) base_url = engine.base_url if external_collection_get_state(engine, collection_id) == 3: checked = ' checked="checked"' else: checked = '' html += """""" % \ { 'checked': checked, 'base_url': base_url, 'internal_name': internal_name, 'name': cgi.escape(name), 'id': "extSearch" + nmtoken_from_string(name), 'siteurl': CFG_SITE_URL,} html += """
%(box_name)s
%(name)s
""" return html def tmpl_nbrecs_info(self, number, prolog=None, epilog=None, ln=CFG_SITE_LANG): """ Return information on the number of records. Parameters: - 'number' *string* - The number of records - 'prolog' *string* (optional) - An HTML code to prefix the number (if **None**, will be '(') - 'epilog' *string* (optional) - An HTML code to append to the number (if **None**, will be ')') """ if number is None: number = 0 if prolog is None: prolog = ''' (''' if epilog is None: epilog = ''')''' return prolog + self.tmpl_nice_number(number, ln) + epilog def tmpl_box_restricted_content(self, ln): """ Displays a box containing a *restricted content* message Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) return _("The contents of this collection is restricted.") def tmpl_box_no_records(self, ln): """ Displays a box containing a *no content* message Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) return _("This collection does not contain any document yet.") def tmpl_instant_browse(self, as, ln, recids, more_link = None): """ Formats a list of records (given in the recids list) from the database. Parameters: - 'as' *int* - Advanced Search interface or not (0 or 1) - 'ln' *string* - The language to display - 'recids' *list* - the list of records from the database - 'more_link' *string* - the "More..." link for the record. If not given, will not be displayed """ # load the right message language _ = gettext_set_language(ln) body = '''''' for recid in recids: body += ''' ''' % {'date': recid['date'], 'body': recid['body'] } body += "
%(date)s %(body)s
" if more_link: body += '
' + \ create_html_link(more_link, {}, '[>> %s]' % _("more")) + \ '
' return '''
%(header)s
%(body)s
''' % {'header' : _("Latest additions:"), 'body' : body, } def tmpl_searchwithin_select(self, ln, fieldname, selected, values): """ Produces 'search within' selection box for the current collection. Parameters: - 'ln' *string* - The language to display - 'fieldname' *string* - the name of the select box produced - 'selected' *string* - which of the values is selected - 'values' *list* - the list of values in the select """ out = '""" return out def tmpl_select(self, fieldname, values, selected=None, css_class=''): """ Produces a generic select box Parameters: - 'css_class' *string* - optional, a css class to display this select with - 'fieldname' *list* - the name of the select box produced - 'selected' *string* - which of the values is selected - 'values' *list* - the list of values in the select """ if css_class != '': class_field = ' class="%s"' % css_class else: class_field = '' out = '""" return out def tmpl_record_links(self, recid, ln): """ Displays the *More info* and *Find similar* links for a record Parameters: - 'ln' *string* - The language to display - 'recid' *string* - the id of the displayed record """ # load the right message language _ = gettext_set_language(ln) out = '''
%(detailed)s - %(similar)s''' % { 'detailed': create_html_link(self.build_search_url(recid=recid, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}), 'similar': create_html_link(self.build_search_url(p="recid:%d" % recid, rm='wrd', ln=ln), {}, _("Similar records"), {'class': "moreinfo"})} if CFG_BIBRANK_SHOW_CITATION_LINKS: num_timescited = get_cited_by_count(recid) if num_timescited: out += ''' - %s ''' % \ create_html_link(self.build_search_url(p='recid:%d' % recid, rm='citation', ln=ln), {}, _("Cited by %i records") % num_timescited, {'class': "moreinfo"}) return out def tmpl_record_body(self, titles, authors, dates, rns, abstracts, urls_u, urls_z, ln): """ Displays the "HTML basic" format of a record Parameters: - 'authors' *list* - the authors (as strings) - 'dates' *list* - the dates of publication - 'rns' *list* - the quicknotes for the record - 'abstracts' *list* - the abstracts for the record - 'urls_u' *list* - URLs to the original versions of the record - 'urls_z' *list* - Not used """ out = "" for title in titles: out += "%(title)s " % { 'title' : cgi.escape(title) } if authors: out += " / " for author in authors[:CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD]: out += '%s; ' % \ create_html_link(self.build_search_url(p=author, f='author', ln=ln), {}, cgi.escape(author)) if len(authors) > CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD: out += "et al" for date in dates: out += " %s." % cgi.escape(date) for rn in rns: out += """ [%(rn)s]""" % {'rn' : cgi.escape(rn)} for abstract in abstracts: out += "
%(abstract)s [...]" % {'abstract' : cgi.escape(abstract[:1+string.find(abstract, '.')]) } for idx in range(0, len(urls_u)): out += """
%(name)s""" % { 'url' : urls_u[idx], 'name' : urls_u[idx] } return out def tmpl_search_in_bibwords(self, p, f, ln, nearest_box): """ Displays the *Words like current ones* links for a search Parameters: - 'p' *string* - Current search words - 'f' *string* - the fields in which the search was done - 'nearest_box' *string* - the HTML code for the "nearest_terms" box - most probably from a create_nearest_terms_box call """ # load the right message language _ = gettext_set_language(ln) out = '

' if f: out += _("Words nearest to %(x_word)s inside %(x_field)s in any collection are:") % {'x_word': '' + cgi.escape(p) + '', 'x_field': '' + cgi.escape(f) + ''} else: out += _("Words nearest to %(x_word)s in any collection are:") % {'x_word': '' + cgi.escape(p) + ''} out += '
' + nearest_box + '

' return out def tmpl_nearest_term_box(self, p, ln, f, terminfo, intro): """ Displays the *Nearest search terms* box Parameters: - 'p' *string* - Current search words - 'f' *string* - a collection description (if the search has been completed in a collection) - 'ln' *string* - The language to display - 'terminfo': tuple (term, hits, argd) for each near term - 'intro' *string* - the intro HTML to prefix the box with """ out = '''''' for term, hits, argd in terminfo: if hits: hitsinfo = str(hits) else: hitsinfo = '-' term = cgi.escape(term) if term == p: # print search word for orientation: nearesttermsboxbody_class = "nearesttermsboxbodyselected" if hits > 0: term = create_html_link(self.build_search_url(argd), {}, term, {'class': "nearesttermsselected"}) else: nearesttermsboxbody_class = "nearesttermsboxbody" term = create_html_link(self.build_search_url(argd), {}, term, {'class': "nearestterms"}) out += '''\ ''' % {'hits': hitsinfo, 'nearesttermsboxbody_class': nearesttermsboxbody_class, 'term': term} out += "
%(hits)s   %(term)s
" return intro + "
" + out + "
" def tmpl_browse_pattern(self, f, fn, ln, browsed_phrases_in_colls, colls): """ Displays the *Nearest search terms* box Parameters: - 'f' *string* - field (*not* i18nized) - 'fn' *string* - field name (i18nized) - 'ln' *string* - The language to display - 'browsed_phrases_in_colls' *array* - the phrases to display - 'colls' *array* - the list of collection parameters of the search (c's) """ # load the right message language _ = gettext_set_language(ln) out = """""" % { 'hits' : _("Hits"), 'fn' : cgi.escape(fn) } if len(browsed_phrases_in_colls) == 1: # one hit only found: phrase, nbhits = browsed_phrases_in_colls[0][0], browsed_phrases_in_colls[0][1] query = {'c': colls, 'ln': ln, 'p': phrase, 'f': f} out += """""" % {'nbhits': nbhits, 'link': create_html_link(self.build_search_url(query), {}, cgi.escape(phrase))} elif len(browsed_phrases_in_colls) > 1: # first display what was found but the last one: for phrase, nbhits in browsed_phrases_in_colls[:-1]: query = {'c': colls, 'ln': ln, 'p': phrase, 'f': f} out += """""" % {'nbhits' : nbhits, 'link': create_html_link(self.build_search_url(query), {}, cgi.escape(phrase))} # now display last hit as "next term": phrase, nbhits = browsed_phrases_in_colls[-1] query = {'c': colls, 'ln': ln, 'p': phrase, 'f': f} out += """""" % {'link': create_html_link(self.build_search_url(query, action='browse'), {}, _("next")), 'siteurl' : CFG_SITE_URL} out += """
%(hits)s   %(fn)s
%(nbhits)s   %(link)s
%(nbhits)s   %(link)s
  %(link)s
""" return out def tmpl_search_box(self, ln, as, cc, cc_intl, ot, sp, action, fieldslist, f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2, rm, p, f, coll_selects, d1y, d2y, d1m, d2m, d1d, d2d, dt, sort_fields, sf, so, ranks, sc, rg, formats, of, pl, jrec, ec, show_colls=True): """ Displays the *Nearest search terms* box Parameters: - 'ln' *string* - The language to display - 'as' *bool* - Should we display an advanced search box? -1 -> 1, from simpler to more advanced - 'cc_intl' *string* - the i18nized current collection name, used for display - 'cc' *string* - the internal current collection name - 'ot', 'sp' *string* - hidden values - 'action' *string* - the action demanded by the user - 'fieldslist' *list* - the list of all fields available, for use in select within boxes in advanced search - 'p, f, f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2, op3, rm' *strings* - the search parameters - 'coll_selects' *array* - a list of lists, each containing the collections selects to display - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates - 'dt' *string* - the dates' types (creation dates, modification dates) - 'sort_fields' *array* - the select information for the sort fields - 'sf' *string* - the currently selected sort field - 'so' *string* - the currently selected sort order ("a" or "d") - 'ranks' *array* - ranking methods - 'rm' *string* - selected ranking method - 'sc' *string* - split by collection or not - 'rg' *string* - selected results/page - 'formats' *array* - available output formats - 'of' *string* - the selected output format - 'pl' *string* - `limit to' search pattern - show_colls *bool* - show cc_intl in page title, and propose coll selection box? """ # load the right message language _ = gettext_set_language(ln) # These are hidden fields the user does not manipulate # directly if as == -1: argd = drop_default_urlargd({ 'ln': ln, 'as': as, 'ot': ot, 'sp': sp, 'ec': ec, }, self.search_results_default_urlargd) else: argd = drop_default_urlargd({ 'cc': cc, 'ln': ln, 'as': as, 'ot': ot, 'sp': sp, 'ec': ec, }, self.search_results_default_urlargd) out = "" if show_colls: # display cc name if asked for out += '''

%(ccname)s

''' % {'ccname' : cgi.escape(cc_intl),} out += '''
''' % {'siteurl' : CFG_SITE_URL} # Only add non-default hidden values for field, value in argd.items(): out += self.tmpl_input_hidden(field, value) leadingtext = _("Search") if action == 'browse': leadingtext = _("Browse") if as == 1: # print Advanced Search form: # define search box elements: out += ''' ''' % { 'simple_search': create_html_link(self.build_search_url(p=p1, f=f1, rm=rm, cc=cc, ln=ln, jrec=jrec, rg=rg), {}, _("Simple Search")), 'leading' : leadingtext, 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, 'matchbox1' : self.tmpl_matchtype_box('m1', m1, ln=ln), 'p1' : cgi.escape(p1,1), 'searchwithin1' : self.tmpl_searchwithin_select( ln = ln, fieldname = 'f1', selected = f1, values = self._add_mark_to_field(value = f1, fields = fieldslist, ln = ln) ), 'andornot1' : self.tmpl_andornot_box( name = 'op1', value = op1, ln = ln ), 'matchbox2' : self.tmpl_matchtype_box('m2', m2, ln=ln), 'p2' : cgi.escape(p2,1), 'searchwithin2' : self.tmpl_searchwithin_select( ln = ln, fieldname = 'f2', selected = f2, values = self._add_mark_to_field(value = f2, fields = fieldslist, ln = ln) ), 'andornot2' : self.tmpl_andornot_box( name = 'op2', value = op2, ln = ln ), 'matchbox3' : self.tmpl_matchtype_box('m3', m3, ln=ln), 'p3' : cgi.escape(p3,1), 'searchwithin3' : self.tmpl_searchwithin_select( ln = ln, fieldname = 'f3', selected = f3, values = self._add_mark_to_field(value = f3, fields = fieldslist, ln = ln) ), 'search' : _("Search"), 'browse' : _("Browse"), 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'search_tips': _("Search Tips") } elif as == 0: # print Simple Search form: out += ''' ''' % { 'advanced_search': create_html_link(self.build_search_url(p1=p, f1=f, rm=rm, - as=max(CFG_ENABLED_SEARCH_INTERFACES), + as=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), cc=cc, jrec=jrec, ln=ln, rg=rg), {}, _("Advanced Search")), 'leading' : leadingtext, 'sizepattern' : CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH, 'p' : cgi.escape(p, 1), 'searchwithin' : self.tmpl_searchwithin_select( ln = ln, fieldname = 'f', selected = f, values = self._add_mark_to_field(value=f, fields=fieldslist, ln=ln) ), 'search' : _("Search"), 'browse' : _("Browse"), 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'search_tips': _("Search Tips") } else: # EXPERIMENTAL # print super-simple search form: search_in = '' if cc_intl != CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME): search_in = ''' ''' % {'search_in_collection_name': _("Search in %(x_collection_name)s") % \ {'x_collection_name': cgi.escape(cc_intl)}, 'collection_id': cc, 'root_collection_name': CFG_SITE_NAME, 'search_everywhere': _("Search everywhere")} out += '''
%(search_tips)s
%(advanced_search)s
%(search_in)s ''' % { 'advanced_search': create_html_link(self.build_search_url(p1=p, f1=f, rm=rm, - as=max(CFG_ENABLED_SEARCH_INTERFACES), + as=max(CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES), cc=cc, jrec=jrec, ln=ln, rg=rg), {}, _("Advanced Search")), 'leading' : leadingtext, 'sizepattern' : CFG_WEBSEARCH_SUPERSIMPLESEARCH_PATTERN_BOX_WIDTH, 'p' : cgi.escape(p, 1), 'searchwithin' : self.tmpl_searchwithin_select( ln = ln, fieldname = 'f', selected = f, values = self._add_mark_to_field(value=f, fields=fieldslist, ln=ln) ), 'search' : _("Search"), 'browse' : _("Browse"), 'siteurl' : CFG_SITE_URL, 'ln' : ln, 'langlink': ln != CFG_SITE_LANG and '?ln=' + ln or '', 'search_tips': _("Search Tips"), 'search_in': search_in } ## secondly, print Collection(s) box: if show_colls and as > -1: # display collections only if there is more than one selects = '' for sel in coll_selects: selects += self.tmpl_select(fieldname='c', values=sel) out += """ """ % { 'leading' : leadingtext, 'msg_coll' : _("collections"), 'colls' : selects, } ## thirdly, print search limits, if applicable: if action != _("Browse") and pl: out += """""" % { 'limitto' : _("Limit to:"), 'sizepattern' : CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH, 'pl' : cgi.escape(pl, 1), } ## fourthly, print from/until date boxen, if applicable: if action == _("Browse") or (d1y==0 and d1m==0 and d1d==0 and d2y==0 and d2m==0 and d2d==0): pass # do not need it else: cell_6_a = self.tmpl_inputdatetype(dt, ln) + self.tmpl_inputdate("d1", ln, d1y, d1m, d1d) cell_6_b = self.tmpl_inputdate("d2", ln, d2y, d2m, d2d) out += """""" % { 'added' : _("Added/modified since:"), 'until' : _("until:"), 'added_or_modified': self.tmpl_inputdatetype(dt, ln), 'date1' : self.tmpl_inputdate("d1", ln, d1y, d1m, d1d), 'date2' : self.tmpl_inputdate("d2", ln, d2y, d2m, d2d), } ## fifthly, print Display results box, including sort/rank, formats, etc: if action != _("Browse") and as > -1: rgs = [] for i in [10, 25, 50, 100, 250, 500]: rgs.append({ 'value' : i, 'text' : "%d %s" % (i, _("results"))}) # sort by: out += """""" % { 'sort_by' : _("Sort by:"), 'display_res' : _("Display results:"), 'out_format' : _("Output format:"), 'select_sf' : self.tmpl_select(fieldname = 'sf', values = sort_fields, selected = sf, css_class = 'address'), 'select_so' : self.tmpl_select(fieldname = 'so', values = [{ 'value' : 'a', 'text' : _("asc.") }, { 'value' : 'd', 'text' : _("desc.") }], selected = so, css_class = 'address'), 'select_rm' : self.tmpl_select(fieldname = 'rm', values = ranks, selected = rm, css_class = 'address'), 'select_rg' : self.tmpl_select(fieldname = 'rg', values = rgs, selected = rg, css_class = 'address'), 'select_sc' : self.tmpl_select(fieldname = 'sc', values = [{ 'value' : 0, 'text' : _("single list") }, { 'value' : 1, 'text' : _("split by collection") }], selected = sc, css_class = 'address'), 'select_of' : self.tmpl_searchwithin_select( ln = ln, fieldname = 'of', selected = of, values = self._add_mark_to_field(value = of, fields = formats, chars = 3, ln = ln) ), } ## last but not least, print end of search box: out += """
""" return out def tmpl_input_hidden(self, name, value): "Produces the HTML code for a hidden field " if isinstance(value, list): list_input = [self.tmpl_input_hidden(name, val) for val in value] return "\n".join(list_input) return """""" % { 'name' : cgi.escape(str(name), 1), 'value' : cgi.escape(str(value), 1), } def _add_mark_to_field(self, value, fields, ln, chars = 1): """Adds the current value as a MARC tag in the fields array Useful for advanced search""" # load the right message language _ = gettext_set_language(ln) out = fields if value and str(value[0:chars]).isdigit(): out.append({'value' : value, 'text' : str(value) + " " + _("MARC tag") }) return out def tmpl_search_pagestart(self, ln) : "page start for search page. Will display after the page header" return """
""" def tmpl_search_pageend(self, ln) : "page end for search page. Will display just before the page footer" return """
""" def tmpl_print_warning(self, msg, type, prologue, epilogue): """Prints warning message and flushes output. Parameters: - 'msg' *string* - The message string - 'type' *string* - the warning type - 'prologue' *string* - HTML code to display before the warning - 'epilogue' *string* - HTML code to display after the warning """ out = '\n%s' % (prologue) if type: out += '%s: ' % type out += '%s%s' % (msg, epilogue) return out def tmpl_print_search_info(self, ln, middle_only, collection, collection_name, collection_id, as, sf, so, rm, rg, nb_found, of, ot, p, f, f1, f2, f3, m1, m2, m3, op1, op2, p1, p2, p3, d1y, d1m, d1d, d2y, d2m, d2d, dt, all_fieldcodes, cpu_time, pl_in_url, jrec, sc, sp): """Prints stripe with the information on 'collection' and 'nb_found' results and CPU time. Also, prints navigation links (beg/next/prev/end) inside the results set. If middle_only is set to 1, it will only print the middle box information (beg/netx/prev/end/etc) links. This is suitable for displaying navigation links at the bottom of the search results page. Parameters: - 'ln' *string* - The language to display - 'middle_only' *bool* - Only display parts of the interface - 'collection' *string* - the collection name - 'collection_name' *string* - the i18nized current collection name - 'as' *bool* - if we display the advanced search interface - 'sf' *string* - the currently selected sort format - 'so' *string* - the currently selected sort order ("a" or "d") - 'rm' *string* - selected ranking method - 'rg' *int* - selected results/page - 'nb_found' *int* - number of results found - 'of' *string* - the selected output format - 'ot' *string* - hidden values - 'p' *string* - Current search words - 'f' *string* - the fields in which the search was done - 'f1, f2, f3, m1, m2, m3, p1, p2, p3, op1, op2' *strings* - the search parameters - 'jrec' *int* - number of first record on this page - 'd1y, d2y, d1m, d2m, d1d, d2d' *int* - the search between dates - 'dt' *string* the dates' type (creation date, modification date) - 'all_fieldcodes' *array* - all the available fields - 'cpu_time' *float* - the time of the query in seconds """ # load the right message language _ = gettext_set_language(ln) out = "" # left table cells: print collection name if not middle_only: out += '''
''' % { 'collection_id': collection_id, 'siteurl' : CFG_SITE_URL, 'collection_link': create_html_link(self.build_search_interface_url(c=collection, as=as, ln=ln), {}, cgi.escape(collection_name)) } else: out += """
""" % { 'siteurl' : CFG_SITE_URL } # middle table cell: print beg/next/prev/end arrows: if not middle_only: out += """
" else: out += "" # right table cell: cpu time info if not middle_only: if cpu_time > -1: out += """""" % { 'time' : _("Search took %s seconds.") % ('%.2f' % cpu_time), } out += "
%(collection_link)s %(recs_found)s  """ % { 'recs_found' : _("%s records found") % ('' + self.tmpl_nice_number(nb_found, ln) + '') } else: out += "" if nb_found > rg: out += "" + cgi.escape(collection_name) + " : " + _("%s records found") % ('' + self.tmpl_nice_number(nb_found, ln) + '') + "   " if nb_found > rg: # navig.arrows are needed, since we have many hits query = {'p': p, 'f': f, 'cc': collection, 'sf': sf, 'so': so, 'sp': sp, 'rm': rm, 'of': of, 'ot': ot, 'as': as, 'ln': ln, 'p1': p1, 'p2': p2, 'p3': p3, 'f1': f1, 'f2': f2, 'f3': f3, 'm1': m1, 'm2': m2, 'm3': m3, 'op1': op1, 'op2': op2, 'sc': 0, 'd1y': d1y, 'd1m': d1m, 'd1d': d1d, 'd2y': d2y, 'd2m': d2m, 'd2d': d2d, 'dt': dt, } # @todo here def img(gif, txt): return '%(txt)s' % { 'txt': txt, 'gif': gif, 'siteurl': CFG_SITE_URL} if jrec-rg > 1: out += create_html_link(self.build_search_url(query, jrec=1, rg=rg), {}, img('sb', _("begin")), {'class': 'img'}) if jrec > 1: out += create_html_link(self.build_search_url(query, jrec=max(jrec-rg, 1), rg=rg), {}, img('sp', _("previous")), {'class': 'img'}) if jrec+rg-1 < nb_found: out += "%d - %d" % (jrec, jrec+rg-1) else: out += "%d - %d" % (jrec, nb_found) if nb_found >= jrec+rg: out += create_html_link(self.build_search_url(query, jrec=jrec+rg, rg=rg), {}, img('sn', _("next")), {'class':'img'}) if nb_found >= jrec+rg+rg: out += create_html_link(self.build_search_url(query, jrec=nb_found-rg+1, rg=rg), {}, img('se', _("end")), {'class': 'img'}) # still in the navigation part cc = collection sc = 0 for var in ['p', 'cc', 'f', 'sf', 'so', 'of', 'rg', 'as', 'ln', 'p1', 'p2', 'p3', 'f1', 'f2', 'f3', 'm1', 'm2', 'm3', 'op1', 'op2', 'sc', 'd1y', 'd1m', 'd1d', 'd2y', 'd2m', 'd2d', 'dt']: out += self.tmpl_input_hidden(name = var, value = vars()[var]) for var in ['ot', 'sp', 'rm']: if vars()[var]: out += self.tmpl_input_hidden(name = var, value = vars()[var]) if pl_in_url: fieldargs = cgi.parse_qs(pl_in_url) for fieldcode in all_fieldcodes: # get_fieldcodes(): if fieldargs.has_key(fieldcode): for val in fieldargs[fieldcode]: out += self.tmpl_input_hidden(name = fieldcode, value = val) out += """  %(jump)s """ % { 'jump' : _("jump to record:"), 'jrec' : jrec, } if not middle_only: out += "%(time)s 
" else: out += "" out += "
" return out def tmpl_nice_number(self, number, ln=CFG_SITE_LANG, thousands_separator=',', max_ndigits_after_dot=None): """ Return nicely printed number NUMBER in language LN using given THOUSANDS_SEPARATOR character. If max_ndigits_after_dot is specified and the number is float, the number is rounded by taking in consideration up to max_ndigits_after_dot digit after the dot. This version does not pay attention to locale. See tmpl_nice_number_via_locale(). """ if type(number) is float: if max_ndigits_after_dot is not None: number = round(number, max_ndigits_after_dot) int_part, frac_part = str(number).split('.') return '%s.%s' % (self.tmpl_nice_number(int(int_part), ln, thousands_separator), frac_part) else: chars_in = list(str(number)) number = len(chars_in) chars_out = [] for i in range(0, number): if i % 3 == 0 and i != 0: chars_out.append(thousands_separator) chars_out.append(chars_in[number-i-1]) chars_out.reverse() return ''.join(chars_out) def tmpl_nice_number_via_locale(self, number, ln=CFG_SITE_LANG): """ Return nicely printed number NUM in language LN using the locale. See also version tmpl_nice_number(). """ if number is None: return None # Temporarily switch the numeric locale to the requested one, and format the number # In case the system has no locale definition, use the vanilla form ol = locale.getlocale(locale.LC_NUMERIC) try: locale.setlocale(locale.LC_NUMERIC, self.tmpl_localemap.get(ln, self.tmpl_default_locale)) except locale.Error: return str(number) try: number = locale.format('%d', number, True) except TypeError: return str(number) locale.setlocale(locale.LC_NUMERIC, ol) return number def tmpl_record_format_htmlbrief_header(self, ln): """Returns the header of the search results list when output is html brief. Note that this function is called for each collection results when 'split by collection' is enabled. See also: tmpl_record_format_htmlbrief_footer(..), tmpl_record_format_htmlbrief_body(..) Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) out = """
""" % { 'siteurl' : CFG_SITE_URL, } return out def tmpl_record_format_htmlbrief_footer(self, ln): """Returns the footer of the search results list when output is html brief. Note that this function is called for each collection results when 'split by collection' is enabled. See also: tmpl_record_format_htmlbrief_header(..), tmpl_record_format_htmlbrief_body(..) Parameters: - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) out = """

""" % { 'basket' : _("ADD TO BASKET") } return out def tmpl_record_format_htmlbrief_body(self, ln, recid, row_number, relevance, record, relevances_prologue, relevances_epilogue): """Returns the html brief format of one record. Used in the search results list for each record. See also: tmpl_record_format_htmlbrief_header(..), tmpl_record_format_htmlbrief_footer(..) Parameters: - 'ln' *string* - The language to display - 'row_number' *int* - The position of this record in the list - 'recid' *int* - The recID - 'relevance' *string* - The relevance of the record - 'record' *string* - The formatted record - 'relevances_prologue' *string* - HTML code to prepend the relevance indicator - 'relevances_epilogue' *string* - HTML code to append to the relevance indicator (used mostly for formatting) """ # load the right message language _ = gettext_set_language(ln) out = """ %(number)s. """ % {'recid': recid, 'number': row_number} if relevance: out += """
""" % { 'prologue' : relevances_prologue, 'epilogue' : relevances_epilogue, 'relevance' : relevance } out += """%s""" % record return out def tmpl_print_results_overview(self, ln, results_final_nb_total, cpu_time, results_final_nb, colls, ec): """Prints results overview box with links to particular collections below. Parameters: - 'ln' *string* - The language to display - 'results_final_nb_total' *int* - The total number of hits for the query - 'colls' *array* - The collections with hits, in the format: - 'coll[code]' *string* - The code of the collection (canonical name) - 'coll[name]' *string* - The display name of the collection - 'results_final_nb' *array* - The number of hits, indexed by the collection codes: - 'cpu_time' *string* - The time the query took - 'url_args' *string* - The rest of the search query - 'ec' *array* - selected external collections """ if len(colls) == 1 and not ec: # if one collection only and no external collections, print nothing: return "" # load the right message language _ = gettext_set_language(ln) # first find total number of hits: out = """
%(founds)s
""" % { 'founds' : _("%(x_fmt_open)sResults overview:%(x_fmt_close)s Found %(x_nb_records)s records in %(x_nb_seconds)s seconds.") %\ {'x_fmt_open': '', 'x_fmt_close': '', 'x_nb_records': '' + self.tmpl_nice_number(results_final_nb_total, ln) + '', 'x_nb_seconds': '%.2f' % cpu_time} } # then print hits per collection: for coll in colls: if results_final_nb.has_key(coll['code']) and results_final_nb[coll['code']] > 0: out += '''%(coll_name)s, %(number)s
''' % { 'coll' : coll['id'], 'coll_name' : cgi.escape(coll['name']), 'number' : _("%s records found") % ('' + self.tmpl_nice_number(results_final_nb[coll['code']], ln) + '') } out += "
" return out def tmpl_print_searchresultbox(self, header, body): """print a nicely formatted box for search results """ #_ = gettext_set_language(ln) # first find total number of hits: out = '
'+header+'
'+body+'
' return out def tmpl_search_no_boolean_hits(self, ln, nearestterms): """No hits found, proposes alternative boolean queries Parameters: - 'ln' *string* - The language to display - 'nearestterms' *array* - Parts of the interface to display, in the format: - 'nearestterms[nbhits]' *int* - The resulting number of hits - 'nearestterms[url_args]' *string* - The search parameters - 'nearestterms[p]' *string* - The search terms """ # load the right message language _ = gettext_set_language(ln) out = _("Boolean query returned no hits. Please combine your search terms differently.") out += '''
''' for term, hits, argd in nearestterms: out += '''\ ''' % {'hits' : hits, 'link': create_html_link(self.build_search_url(argd), {}, cgi.escape(term), {'class': "nearestterms"})} out += """
%(hits)s   %(link)s
""" return out def tmpl_similar_author_names(self, authors, ln): """No hits found, proposes alternative boolean queries Parameters: - 'authors': a list of (name, hits) tuples - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) out = ''' ''' % { 'similar' : _("See also: similar author names") } for author, hits in authors: out += '''\ ''' % {'link': create_html_link( self.build_search_url(p=author, f='author', ln=ln), {}, cgi.escape(author), {'class':"google"}), 'nb' : hits} out += """
%(similar)s
%(nb)d %(link)s
""" return out def tmpl_print_record_detailed(self, recID, ln): """Displays a detailed on-the-fly record Parameters: - 'ln' *string* - The language to display - 'recID' *int* - The record id """ # okay, need to construct a simple "Detailed record" format of our own: out = "

 " # secondly, title: titles = get_fieldvalues(recID, "245__a") for title in titles: out += "

%s

" % cgi.escape(title) # thirdly, authors: authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a") if authors: out += "

" for author in authors: out += '%s; ' % create_html_link(self.build_search_url( ln=ln, p=author, f='author'), {}, cgi.escape(author)) out += "

" # fourthly, date of creation: dates = get_fieldvalues(recID, "260__c") for date in dates: out += "

%s

" % date # fifthly, abstract: abstracts = get_fieldvalues(recID, "520__a") for abstract in abstracts: out += """

Abstract: %s

""" % abstract # fifthly bis, keywords: keywords = get_fieldvalues(recID, "6531_a") if len(keywords): out += """

Keyword(s):""" for keyword in keywords: out += '%s; ' % create_html_link( self.build_search_url(ln=ln, p=keyword, f='keyword'), {}, cgi.escape(keyword)) out += '

' # fifthly bis bis, published in: prs_p = get_fieldvalues(recID, "909C4p") prs_v = get_fieldvalues(recID, "909C4v") prs_y = get_fieldvalues(recID, "909C4y") prs_n = get_fieldvalues(recID, "909C4n") prs_c = get_fieldvalues(recID, "909C4c") for idx in range(0, len(prs_p)): out += """

Publ. in: %s""" % prs_p[idx] if prs_v and prs_v[idx]: out += """%s""" % prs_v[idx] if prs_y and prs_y[idx]: out += """(%s)""" % prs_y[idx] if prs_n and prs_n[idx]: out += """, no.%s""" % prs_n[idx] if prs_c and prs_c[idx]: out += """, p.%s""" % prs_c[idx] out += """.

""" # sixthly, fulltext link: urls_z = get_fieldvalues(recID, "8564_z") urls_u = get_fieldvalues(recID, "8564_u") for idx in range(0, len(urls_u)): link_text = "URL" try: if urls_z[idx]: link_text = urls_z[idx] except IndexError: pass out += """

%s: %s

""" % (link_text, urls_u[idx], urls_u[idx]) # print some white space at the end: out += "

" return out def tmpl_print_record_list_for_similarity_boxen(self, title, recID_score_list, ln=CFG_SITE_LANG): """Print list of records in the "hs" (HTML Similarity) format for similarity boxes. RECID_SCORE_LIST is a list of (recID1, score1), (recID2, score2), etc. """ from invenio.search_engine import print_record, record_public_p recID_score_list_to_be_printed = [] # firstly find 5 first public records to print: nb_records_to_be_printed = 0 nb_records_seen = 0 while nb_records_to_be_printed < 5 and nb_records_seen < len(recID_score_list) and nb_records_seen < 50: # looking through first 50 records only, picking first 5 public ones (recID, score) = recID_score_list[nb_records_seen] nb_records_seen += 1 if record_public_p(recID): nb_records_to_be_printed += 1 recID_score_list_to_be_printed.append([recID, score]) # secondly print them: out = '''
%(title)s
''' % { 'title': cgi.escape(title) } for recid, score in recID_score_list_to_be_printed: out += ''' ''' % { 'score': score, 'info' : print_record(recid, format="hs", ln=ln), } out += """
(%(score)s)  %(info)s
""" return out def tmpl_print_record_brief(self, ln, recID): """Displays a brief record on-the-fly Parameters: - 'ln' *string* - The language to display - 'recID' *int* - The record id """ out = "" # record 'recID' does not exist in format 'format', so print some default format: # firstly, title: titles = get_fieldvalues(recID, "245__a") # secondly, authors: authors = get_fieldvalues(recID, "100__a") + get_fieldvalues(recID, "700__a") # thirdly, date of creation: dates = get_fieldvalues(recID, "260__c") # thirdly bis, report numbers: rns = get_fieldvalues(recID, "037__a") rns = get_fieldvalues(recID, "088__a") # fourthly, beginning of abstract: abstracts = get_fieldvalues(recID, "520__a") # fifthly, fulltext link: urls_z = get_fieldvalues(recID, "8564_z") urls_u = get_fieldvalues(recID, "8564_u") return self.tmpl_record_body( titles = titles, authors = authors, dates = dates, rns = rns, abstracts = abstracts, urls_u = urls_u, urls_z = urls_z, ln=ln) def tmpl_print_record_brief_links(self, ln, recID): """Displays links for brief record on-the-fly Parameters: - 'ln' *string* - The language to display - 'recID' *int* - The record id """ # load the right message language _ = gettext_set_language(ln) out = '
' if CFG_WEBSEARCH_USE_ALEPH_SYSNOS: alephsysnos = get_fieldvalues(recID, "970__a") if len(alephsysnos)>0: alephsysno = alephsysnos[0] out += '%s' % \ create_html_link(self.build_search_url(sysno=alephsysno, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}) else: out += '%s' % \ create_html_link(self.build_search_url(recid=recID, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}) else: out += '%s' % \ create_html_link(self.build_search_url(recid=recID, ln=ln), {}, _("Detailed record"), {'class': "moreinfo"}) out += ' - %s' % \ create_html_link(self.build_search_url(p="recid:%d" % recID, rm="wrd", ln=ln), {}, _("Similar records"), {'class': "moreinfo"}) if CFG_BIBRANK_SHOW_CITATION_LINKS: num_timescited = get_cited_by_count(recID) if num_timescited: out += ' - %s' % \ create_html_link(self.build_search_url(p="recid:%d" % recID, rm="citation", ln=ln), {}, _("Cited by %i records") % num_timescited, {'class': "moreinfo"}) out+='
' return out def tmpl_xml_rss_prologue(self, current_url=None, previous_url=None, next_url=None): """Creates XML RSS 2.0 prologue.""" out = """ %(sitename)s %(siteurl)s %(sitename)s latest documents %(sitelang)s %(timestamp)s CDS Invenio %(version)s %(sitesupportemail)s %(timetolive)s%(previous_link)s%(next_link)s%(current_link)s %(siteurl)s/img/cds.png %(sitename)s %(siteurl)s Search Search this site: p %(siteurl)s/search """ % {'sitename': CFG_SITE_NAME, 'siteurl': CFG_SITE_URL, 'sitelang': CFG_SITE_LANG, 'timestamp': time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime()), 'version': CFG_VERSION, 'sitesupportemail': CFG_SITE_SUPPORT_EMAIL, 'timetolive': CFG_WEBSEARCH_RSS_TTL, 'current_link': (current_url and \ '\n\n' % current_url) or '', 'previous_link': (previous_url and \ '\n' % previous_url) or '', 'next_link': (next_url and \ '\n \n""" return out def tmpl_xml_nlm_prologue(self): """Creates XML NLM prologue.""" out = """\n""" return out def tmpl_xml_nlm_epilogue(self): """Creates XML NLM epilogue.""" out = """\n""" return out def tmpl_xml_marc_prologue(self): """Creates XML MARC prologue.""" out = """\n""" return out def tmpl_xml_marc_epilogue(self): """Creates XML MARC epilogue.""" out = """\n""" return out def tmpl_xml_default_prologue(self): """Creates XML default format prologue. (Sanity calls only.)""" out = """\n""" return out def tmpl_xml_default_epilogue(self): """Creates XML default format epilogue. (Sanity calls only.)""" out = """\n""" return out def tmpl_collection_not_found_page_title(self, colname, ln=CFG_SITE_LANG): """ Create page title for cases when unexisting collection was asked for. """ _ = gettext_set_language(ln) out = _("Collection %s Not Found") % cgi.escape(colname) return out def tmpl_collection_not_found_page_body(self, colname, ln=CFG_SITE_LANG): """ Create page body for cases when unexisting collection was asked for. """ _ = gettext_set_language(ln) out = """

%(title)s

%(sorry)s

%(you_may_want)s

""" % { 'title': self.tmpl_collection_not_found_page_title(colname, ln), 'sorry': _("Sorry, collection %s does not seem to exist.") % \ ('' + cgi.escape(colname) + ''), 'you_may_want': _("You may want to start browsing from %s.") % \ ('' + \ cgi.escape(CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME)) + '')} return out def tmpl_alert_rss_teaser_box_for_query(self, id_query, ln): """Propose teaser for setting up this query as alert or RSS feed. Parameters: - 'id_query' *int* - ID of the query we make teaser for - 'ln' *string* - The language to display """ # load the right message language _ = gettext_set_language(ln) # get query arguments: res = run_sql("SELECT urlargs FROM query WHERE id=%s", (id_query,)) argd = {} if res: argd = cgi.parse_qs(res[0][0]) rssurl = self.build_rss_url(argd) alerturl = CFG_SITE_URL + '/youralerts/input?ln=%s&idq=%s' % (ln, id_query) out = '''
%(similar)s
%(msg_alert)s
''' % { 'similar' : _("Interested in being notified about new results for this query?"), 'msg_alert': _("""Set up a personal %(x_url1_open)semail alert%(x_url1_close)s or subscribe to the %(x_url2_open)sRSS feed%(x_url2_close)s.""") % \ {'x_url1_open': ' ' % (alerturl, CFG_SITE_URL) + ' ' % (alerturl), 'x_url1_close': '', 'x_url2_open': ' ' % (rssurl, CFG_SITE_URL) + ' ' % rssurl, 'x_url2_close': '', }} return out def tmpl_detailed_record_metadata(self, recID, ln, format, content, creationdate=None, modificationdate=None): """Returns the main detailed page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - 'format' *string* - The format in used to print the record - 'content' *string* - The main content of the page - 'creationdate' *string* - The creation date of the printed record - 'modificationdate' *string* - The last modification date of the printed record """ _ = gettext_set_language(ln) out = content return out def tmpl_detailed_record_statistics(self, recID, ln, downloadsimilarity, downloadhistory, viewsimilarity): """Returns the statistics page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - downloadsimilarity *string* - downloadsimilarity box - downloadhistory *string* - downloadhistory box - viewsimilarity *string* - viewsimilarity box """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_DOWNLOAD_STATS and downloadsimilarity is not None: similar = self.tmpl_print_record_list_for_similarity_boxen ( _("People who downloaded this document also downloaded:"), downloadsimilarity, ln) out = '' out += ''' ''' % { 'siteurl': CFG_SITE_URL, 'recid': recID, 'ln': ln, 'similar': similar, 'more': _("more"), 'graph': downloadsimilarity } out += '
%(graph)s
%(similar)s
' out += '
' if CFG_BIBRANK_SHOW_READING_STATS and viewsimilarity is not None: out += self.tmpl_print_record_list_for_similarity_boxen ( _("People who viewed this page also viewed:"), viewsimilarity, ln) if CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS and downloadhistory is not None: out += downloadhistory + '
' return out def tmpl_detailed_record_citations_prologue(self, recID, ln): """Returns the prologue of the citations page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display """ return '' def tmpl_detailed_record_citations_epilogue(self, recID, ln): """Returns the epilogue of the citations page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display """ return '
' def tmpl_detailed_record_citations_citing_list(self, recID, ln, citinglist): """Returns the list of record citing this one Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - citinglist *list* - a list of tuples [(x1,y1),(x2,y2),..] where x is doc id and y is number of citations """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_STATS and citinglist is not None: similar = self.tmpl_print_record_list_for_similarity_boxen( _("Cited by: %s records") % len (citinglist), citinglist, ln) out = ''' %(similar)s %(more)s

''' % { 'more': create_html_link( self.build_search_url(p='recid:%d' % \ recID, #XXXX rm='citation', ln=ln), {}, _("more")), 'similar': similar} return out def tmpl_detailed_record_citations_citation_history(self, recID, ln, citationhistory): """Returns the citations history graph of this record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - citationhistory *string* - citationhistory box """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_GRAPHS and citationhistory is not None: out = '%s' % citationhistory return out def tmpl_detailed_record_citations_co_citing(self, recID, ln, cociting): """Returns the list of cocited records Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - cociting *string* - cociting box """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_STATS and cociting is not None: similar = self.tmpl_print_record_list_for_similarity_boxen ( _("Co-cited with: %s records") % len (cociting), cociting, ln) out = ''' %(similar)s %(more)s
''' % { 'more': create_html_link(self.build_search_url(p='cocitedwith:%d' % recID, ln=ln), {}, _("more")), 'similar': similar} return out def tmpl_detailed_record_citations_self_cited(self, recID, ln, selfcited, citinglist): """Returns the list of self-citations for this record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - selfcited list - a list of self-citations for recID """ # load the right message language _ = gettext_set_language(ln) out = '' if CFG_BIBRANK_SHOW_CITATION_GRAPHS and selfcited is not None: sc_scorelist = [] #a score list for print.. for s in selfcited: #copy weight from citations weight = 0 for c in citinglist: (crec, score) = c if crec == s: weight = score tmp = [s, weight] sc_scorelist.append(tmp) scite = self.tmpl_print_record_list_for_similarity_boxen ( _(".. of which self-citations: %s records") % len (selfcited), sc_scorelist, ln) out = ''+scite+'' return out def tmpl_author_information(self, req, pubs, authorname, num_downloads, aff_pubdict, citedbylist, kwtuples, authors, vtuples, ln): """Prints stuff about the author given as authorname. 1. Author name + his/her institutes. Each institute I has a link to papers where the auhtor has I as institute. 2. Publications, number: link to search by author. 3. Keywords 4. Author collabs 5. Publication venues like journals The parameters are data structures needed to produce 1-6, as follows: req - request pubs - list of recids, probably the records that have the author as an author authorname - evident num_downloads - evident aff_pubdict - a dictionary where keys are inst names and values lists of recordids citedbylist - list of recs that cite pubs kwtuples - keyword tuples like ('HIGGS BOSON',[3,4]) where 3 and 4 are recids authors - a list of authors that have collaborated with authorname """ _ = gettext_set_language(ln) #make a authoraff string that looks like CERN (1), Caltech (2) etc authoraff = "" for a in aff_pubdict.keys(): recids = "+or+".join(map(str, aff_pubdict[a])) print_a = a if (print_a == ' '): print_a = _("unknown") if authoraff: authoraff += '
' authoraff += ""+print_a+' ('+str(len(aff_pubdict[a]))+")" #print a "general" banner about the author req.write("

" + authorname + "

") #print affiliations line1 = "" + _("Affiliations:") + "" line2 = authoraff req.write(self.tmpl_print_searchresultbox(line1, line2)) #keywords, collaborations keywstr = "" collabstr = "" if (kwtuples): for (freq, kw) in kwtuples: if keywstr: keywstr += '
' #create a link in author=x, keyword=y searchstr = create_html_link(self.build_search_url( p='author:"' + authorname + '" ' + 'keyword:"' + kw + '"'), {}, kw+" ("+str(freq)+")",) keywstr = keywstr+" "+searchstr banner = self.tmpl_print_searchresultbox("" + _("Frequent keywords:") + "", keywstr) req.write(banner) if (authors): for c in authors: c = c.strip() if collabstr: collabstr += '
' #do not add this person him/herself in the list if not (c == authorname): collabstr = collabstr + " "+c+"" banner = self.tmpl_print_searchresultbox("" + _("Frequent co-authors:") + "", collabstr) req.write(banner) if (vtuples): #get the second member: that's the journal name pubinfo = "" for t in vtuples: (times, journal) = t pubinfo += journal+" ("+str(times)+") " banner = self.tmpl_print_searchresultbox(_("Publishes in"), pubinfo) req.write(banner) #print papers: searchstr = create_html_link(self.build_search_url(p=authorname, f='author'), {}, "All papers ("+str(len(pubs))+")",) line1 = "" + _("Papers:") + "" line2 = searchstr+" ("+_("downloaded")+" " line2 += str(num_downloads)+" "+_("times")+")" from invenio.search_engine import perform_request_search if CFG_INSPIRE_SITE: CFG_COLLS = ['Book', 'Conference', 'Introductory', 'Lectures', 'Preprint', 'Published', 'Report', 'Review', 'Thesis'] else: CFG_COLLS = ['Article', 'Book', 'Preprint',] for coll in CFG_COLLS: collsearch = intbitset(pubs) & intbitset(perform_request_search(p="collection:"+coll)) if len(collsearch) > 0: num = len(collsearch) line2 += "
" + create_html_link(self.build_search_url(p='author:"' + authorname + '" ' + \ 'collection:' + coll), {}, coll + " ("+str(num)+")",) banner = self.tmpl_print_searchresultbox(line1, line2) req.write(banner) # print citations: if len(citedbylist): line1 = "" + _("Citations:") + "" line2 = "" req.write(self.tmpl_print_searchresultbox(line1, line2)) # they will be printed after that def tmpl_detailed_record_references(self, recID, ln, content): """Returns the discussion page of a record Parameters: - 'recID' *int* - The ID of the printed record - 'ln' *string* - The language to display - 'content' *string* - The main content of the page """ # load the right message language _ = gettext_set_language(ln) out = '' if content is not None: out += content return out def tmpl_citesummary_prologue(self, d_total_recs, l_colls, searchpattern, searchfield, ln=CFG_SITE_LANG): """HTML citesummary format, prologue. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """

""" % \ {'msg_title': _("Citation summary results"),} for coll, colldef in l_colls: out += '' % coll out += '' out += """""" % \ {'msg_recs': _("Total number of citable papers analyzed:"),} for coll, colldef in l_colls: link_url = CFG_SITE_URL + '/search?p=' if searchpattern: p = searchpattern if searchfield: if " " in searchpattern: p = searchfield + ':"' + searchpattern + '"' else: p = searchfield + ':' + searchpattern link_url += quote(p) if colldef: link_url += ' ' + quote(colldef) link_url += '&rm=citation'; link_text = self.tmpl_nice_number(d_total_recs[coll], ln) out += '' % (link_url, link_text) out += '' return out def tmpl_citesummary_overview(self, d_total_cites, d_avg_cites, l_colls, ln=CFG_SITE_LANG): """HTML citesummary format, overview. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """""" % \ {'msg_cites': _("Total number of citations:"),} for coll, colldef in l_colls: out += '' % self.tmpl_nice_number(d_total_cites[coll], ln) out += '' out += """""" % \ {'msg_avgcit': _("Average citations per paper:"),} for coll, colldef in l_colls: out += '' % d_avg_cites[coll] out += '' out += """""" % \ {'msg_breakdown': _("Breakdown of papers by citations:"),} return out def tmpl_citesummary_breakdown_by_fame(self, d_cites, low, high, fame, l_colls, searchpattern, searchfield, ln=CFG_SITE_LANG): """HTML citesummary format, breakdown by fame. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """""" % \ {'fame': fame,} for coll, colldef in l_colls: link_url = CFG_SITE_URL + '/search?p=' if searchpattern: p = searchpattern if searchfield: if " " in searchpattern: p = searchfield + ':"' + searchpattern + '"' else: p = searchfield + ':' + searchpattern link_url += quote(p) + ' ' if colldef: link_url += quote(colldef) + ' ' if low == 0 and high == 0: link_url += quote('cited:0') else: link_url += quote('cited:%i->%i' % (low, high)) link_url += '&rm=citation'; link_text = self.tmpl_nice_number(d_cites[coll], ln) out += '' % (link_url, link_text) out += '' return out def tmpl_citesummary_epilogue(self, ln=CFG_SITE_LANG): """HTML citesummary format, epilogue. A part of HCS format suite.""" _ = gettext_set_language(ln) out = """
%(msg_title)s%s
%(msg_recs)s%s
%(msg_cites)s%s
%(msg_avgcit)s%.1f
%(msg_breakdown)s
%(fame)s%s
""" return out diff --git a/modules/websearch/lib/websearch_webcoll.py b/modules/websearch/lib/websearch_webcoll.py index e9fb8f74c..01bcdd4e9 100644 --- a/modules/websearch/lib/websearch_webcoll.py +++ b/modules/websearch/lib/websearch_webcoll.py @@ -1,929 +1,929 @@ ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Create CDS Invenio collection cache.""" __revision__ = "$Id$" import calendar import copy import sys import cgi import re import os import string import time from invenio.config import \ CFG_CERN_SITE, \ CFG_WEBSEARCH_INSTANT_BROWSE, \ CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS, \ CFG_WEBSEARCH_I18N_LATEST_ADDITIONS, \ CFG_CACHEDIR, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_URL, \ - CFG_ENABLED_SEARCH_INTERFACES, \ - CFG_DEFAULT_SEARCH_INTERFACE + CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES, \ + CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE from invenio.messages import gettext_set_language, language_list_long from invenio.search_engine import HitSet, search_pattern, get_creation_date, get_field_i18nname, collection_restricted_p from invenio.dbquery import run_sql, Error, get_table_update_time from invenio.access_control_engine import acc_authorize_action from invenio.bibrank_record_sorter import get_bibrank_methods from invenio.dateutils import convert_datestruct_to_dategui from invenio.bibformat import format_record from invenio.websearch_external_collections import \ external_collection_load_states, \ dico_collection_external_searches, \ external_collection_sort_engine_by_name from invenio.bibtask import task_init, task_get_option, task_set_option, \ write_message, task_has_option, task_update_progress, \ task_sleep_now_if_required import invenio.template websearch_templates = invenio.template.load('websearch') ## global vars collection_house = {} # will hold collections we treat in this run of the program; a dict of {collname2, collobject1}, ... # cfg_cache_last_updated_timestamp_tolerance -- cache timestamp # tolerance (in seconds), to account for the fact that an admin might # accidentally happen to edit the collection definitions at exactly # the same second when some webcoll process was about to be started. # In order to be safe, let's put an exaggerated timestamp tolerance # value such as 20 seconds: cfg_cache_last_updated_timestamp_tolerance = 20 # cfg_cache_last_updated_timestamp_file -- location of the cache # timestamp file: cfg_cache_last_updated_timestamp_file = "%s/collections/last_updated" % CFG_CACHEDIR def get_collection(colname): """Return collection object from the collection house for given colname. If does not exist, then create it.""" if not collection_house.has_key(colname): colobject = Collection(colname) collection_house[colname] = colobject return collection_house[colname] ## auxiliary functions: def mymkdir(newdir, mode=0777): """works the way a good mkdir should :) - already exists, silently complete - regular file in the way, raise an exception - parent directory(ies) does not exist, make them as well """ if os.path.isdir(newdir): pass elif os.path.isfile(newdir): raise OSError("a file with the same name as the desired " \ "dir, '%s', already exists." % newdir) else: head, tail = os.path.split(newdir) if head and not os.path.isdir(head): mymkdir(head, mode) if tail: os.umask(022) os.mkdir(newdir, mode) def is_selected(var, fld): "Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes." if var == fld: return ' selected="selected"' else: return "" def get_field(recID, tag): "Gets list of field 'tag' for the record with 'recID' system number." out = [] digit = tag[0:2] bx = "bib%sx" % digit bibx = "bibrec_bib%sx" % digit query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" \ % (bx, bibx, recID, tag) res = run_sql(query) for row in res: out.append(row[0]) return out class Collection: "Holds the information on collections (id,name,dbquery)." def __init__(self, name=""): "Creates collection instance by querying the DB configuration database about 'name'." self.calculate_reclist_run_already = 0 # to speed things up without much refactoring self.update_reclist_run_already = 0 # to speed things up without much refactoring self.reclist_with_nonpublic_subcolls = HitSet() if not name: self.name = CFG_SITE_NAME # by default we are working on the home page self.id = 1 self.dbquery = None self.nbrecs = None self.reclist = HitSet() else: self.name = name try: res = run_sql("""SELECT id,name,dbquery,nbrecs,reclist FROM collection WHERE name=%s""", (name,)) if res: self.id = res[0][0] self.name = res[0][1] self.dbquery = res[0][2] self.nbrecs = res[0][3] try: self.reclist = HitSet(res[0][4]) except: self.reclist = HitSet() else: # collection does not exist! self.id = None self.dbquery = None self.nbrecs = None self.reclist = HitSet() except Error, e: print "Error %d: %s" % (e.args[0], e.args[1]) sys.exit(1) def get_example_search_queries(self): """Returns list of sample search queries for this collection. """ res = run_sql("""SELECT example.body FROM example LEFT JOIN collection_example on example.id=collection_example.id_example WHERE collection_example.id_collection=%s ORDER BY collection_example.score""", (self.id,)) return [query[0] for query in res] def get_name(self, ln=CFG_SITE_LANG, name_type="ln", prolog="", epilog="", prolog_suffix=" ", epilog_suffix=""): """Return nicely formatted collection name for language LN. The NAME_TYPE may be 'ln' (=long name), 'sn' (=short name), etc.""" out = prolog i18name = "" res = run_sql("SELECT value FROM collectionname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, name_type)) try: i18name += res[0][0] except IndexError: pass if i18name: out += i18name else: out += self.name out += epilog return out def get_ancestors(self): "Returns list of ancestors of the current collection." ancestors = [] id_son = self.id while 1: query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_son=%d AND c.id=cc.id_dad" % int(id_son) res = run_sql(query, None, 1) if res: col_ancestor = get_collection(res[0][1]) ancestors.append(col_ancestor) id_son = res[0][0] else: break ancestors.reverse() return ancestors def restricted_p(self): """Predicate to test if the collection is restricted or not. Return the contect of the `restrited' column of the collection table (typically Apache group). Otherwise return None if the collection is public.""" if collection_restricted_p(self.name): return 1 return None def get_sons(self, type='r'): "Returns list of direct sons of type 'type' for the current collection." sons = [] id_dad = self.id query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC, c.name ASC" % (int(id_dad), type) res = run_sql(query) for row in res: sons.append(get_collection(row[1])) return sons def get_descendants(self, type='r'): "Returns list of all descendants of type 'type' for the current collection." descendants = [] id_dad = self.id query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC" % (int(id_dad), type) res = run_sql(query) for row in res: col_desc = get_collection(row[1]) descendants.append(col_desc) descendants += col_desc.get_descendants() return descendants def write_cache_file(self, filename='', filebody=''): "Write a file inside collection cache." # open file: dirname = "%s/collections/%d" % (CFG_CACHEDIR, self.id) mymkdir(dirname) fullfilename = dirname + "/%s.html" % filename try: os.umask(022) f = open(fullfilename, "w") except IOError, v: try: (code, message) = v except: code = 0 message = v print "I/O Error: " + str(message) + " (" + str(code) + ")" sys.exit(1) # print user info: write_message("... creating %s" % fullfilename, verbose=6) sys.stdout.flush() # print page body: f.write(filebody) # close file: f.close() def update_webpage_cache(self): """Create collection page header, navtrail, body (including left and right stripes) and footer, and call write_cache_file() afterwards to update the collection webpage cache.""" ## precalculate latest additions for non-aggregate ## collections (the info is ln and as independent) if self.dbquery and not CFG_WEBSEARCH_I18N_LATEST_ADDITIONS: self.create_latest_additions_info() ## do this for each language: for lang, lang_fullname in language_list_long(): # but only if some concrete language was not chosen only: if task_get_option("language", lang) == lang: if self.dbquery and CFG_WEBSEARCH_I18N_LATEST_ADDITIONS: self.create_latest_additions_info(ln=lang) # load the right message language _ = gettext_set_language(lang) ## first, update navtrail: - for as in CFG_ENABLED_SEARCH_INTERFACES: + for as in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES: self.write_cache_file("navtrail-as=%s-ln=%s" % (as, lang), self.create_navtrail_links(as, lang)) ## second, update page body: - for as in CFG_ENABLED_SEARCH_INTERFACES: # do super-simple, simple and advanced search pages: + for as in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES: # do super-simple, simple and advanced search pages: body = websearch_templates.tmpl_webcoll_body( ln=lang, collection=self.name, te_portalbox = self.create_portalbox(lang, 'te'), searchfor = self.create_searchfor(as, lang), np_portalbox = self.create_portalbox(lang, 'np'), narrowsearch = self.create_narrowsearch(as, lang, 'r'), focuson = self.create_narrowsearch(as, lang, "v") + \ self.create_external_collections_box(lang), instantbrowse = self.create_instant_browse(as=as, ln=lang), ne_portalbox = self.create_portalbox(lang, 'ne') ) self.write_cache_file("body-as=%s-ln=%s" % (as, lang), body) ## third, write portalboxes: self.write_cache_file("portalbox-tp-ln=%s" % lang, self.create_portalbox(lang, "tp")) self.write_cache_file("portalbox-te-ln=%s" % lang, self.create_portalbox(lang, "te")) self.write_cache_file("portalbox-lt-ln=%s" % lang, self.create_portalbox(lang, "lt")) self.write_cache_file("portalbox-rt-ln=%s" % lang, self.create_portalbox(lang, "rt")) ## fourth, write 'last updated' information: self.write_cache_file("last-updated-ln=%s" % lang, convert_datestruct_to_dategui(time.localtime(), ln=lang)) return - def create_navtrail_links(self, as=CFG_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG): + def create_navtrail_links(self, as=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG): """Creates navigation trail links, i.e. links to collection ancestors (except Home collection). If as==1, then links to Advanced Search interfaces; otherwise Simple Search. """ dads = [] for dad in self.get_ancestors(): if dad.name != CFG_SITE_NAME: # exclude Home collection dads.append((dad.name, dad.get_name(ln))) return websearch_templates.tmpl_navtrail_links( as=as, ln=ln, dads=dads) def create_portalbox(self, lang=CFG_SITE_LANG, position="rt"): """Creates portalboxes of language CFG_SITE_LANG of the position POSITION by consulting DB configuration database. The position may be: 'lt'='left top', 'rt'='right top', etc.""" out = "" query = "SELECT p.title,p.body FROM portalbox AS p, collection_portalbox AS cp "\ " WHERE cp.id_collection=%d AND p.id=cp.id_portalbox AND cp.ln='%s' AND cp.position='%s' "\ " ORDER BY cp.score DESC" % (self.id, lang, position) res = run_sql(query) for row in res: title, body = row[0], row[1] if title: out += websearch_templates.tmpl_portalbox(title = title, body = body) else: # no title specified, so print body ``as is'' only: out += body return out - def create_narrowsearch(self, as=CFG_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG, type="r"): + def create_narrowsearch(self, as=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG, type="r"): """Creates list of collection descendants of type 'type' under title 'title'. If as==1, then links to Advanced Search interfaces; otherwise Simple Search. Suitable for 'Narrow search' and 'Focus on' boxes.""" # get list of sons and analyse it sons = self.get_sons(type) if not sons: return '' # get descendents descendants = self.get_descendants(type) grandsons = [] if CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS: # load grandsons for each son for son in sons: grandsons.append(son.get_sons()) # return "" return websearch_templates.tmpl_narrowsearch( as = as, ln = ln, type = type, father = self, has_grandchildren = len(descendants)>len(sons), sons = sons, display_grandsons = CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS, grandsons = grandsons ) def create_external_collections_box(self, ln=CFG_SITE_LANG): external_collection_load_states() if not dico_collection_external_searches.has_key(self.id): return "" engines_list = external_collection_sort_engine_by_name(dico_collection_external_searches[self.id]) return websearch_templates.tmpl_searchalso(ln, engines_list, self.id) def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG): """ Create info about latest additions that will be used for create_instant_browse() later. """ self.latest_additions_info = [] if self.nbrecs and self.reclist: # firstly, get last 'rg' records: recIDs = list(self.reclist) # FIXME: temporary hack in order to display tweaked latest # additions box for some CERN collections: if CFG_CERN_SITE: this_year = time.strftime("%Y", time.localtime()) if self.name in ['CERN Yellow Reports']: last_year = str(int(this_year) - 1) # detect recIDs only from this and past year: recIDs = list(self.reclist & \ search_pattern(p='year:%s or year:%s' % \ (this_year, last_year))) elif self.name in ['Videos']: # detect recIDs only from this year: recIDs = list(self.reclist & \ search_pattern(p='year:%s' % this_year)) total = len(recIDs) to_display = min(rg, total) for idx in range(total-1, total-to_display-1, -1): recid = recIDs[idx] self.latest_additions_info.append({'id': recid, 'format': format_record(recid, "hb", ln=ln), 'date': get_creation_date(recid, fmt="%Y-%m-%d
%H:%i")}) return - def create_instant_browse(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, as=CFG_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG): + def create_instant_browse(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, as=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG): "Searches database and produces list of last 'rg' records." if self.restricted_p(): return websearch_templates.tmpl_box_restricted_content(ln = ln) if rg == 0: # do not show latest additions box return "" # FIXME: temporary hack in order not to display latest # additions box for some CERN collections: if CFG_CERN_SITE and self.name in ['Periodicals', 'Electronic Journals']: return "" try: self.latest_additions_info latest_additions_info_p = True except: latest_additions_info_p = False if latest_additions_info_p: passIDs = [] for idx in range(0, min(len(self.latest_additions_info), rg)): passIDs.append({'id': self.latest_additions_info[idx]['id'], 'body': self.latest_additions_info[idx]['format'] + \ websearch_templates.tmpl_record_links(recid=self.latest_additions_info[idx]['id'], ln=ln), 'date': self.latest_additions_info[idx]['date']}) if self.nbrecs > rg: url = websearch_templates.build_search_url( cc=self.name, jrec=rg+1, ln=ln, as=as) else: url = "" return websearch_templates.tmpl_instant_browse( as=as, ln=ln, recids=passIDs, more_link=url) return websearch_templates.tmpl_box_no_records(ln=ln) def create_searchoptions(self): "Produces 'Search options' portal box." box = "" query = """SELECT DISTINCT(cff.id_field),f.code,f.name FROM collection_field_fieldvalue AS cff, field AS f WHERE cff.id_collection=%d AND cff.id_fieldvalue IS NOT NULL AND cff.id_field=f.id ORDER BY cff.score DESC""" % self.id res = run_sql(query) if res: for row in res: field_id = row[0] field_code = row[1] field_name = row[2] query_bis = """SELECT fv.value,fv.name FROM fieldvalue AS fv, collection_field_fieldvalue AS cff WHERE cff.id_collection=%d AND cff.type='seo' AND cff.id_field=%d AND fv.id=cff.id_fieldvalue ORDER BY cff.score_fieldvalue DESC, cff.score DESC, fv.name ASC""" % (self.id, field_id) res_bis = run_sql(query_bis) if res_bis: values = [{'value' : '', 'text' : 'any' + ' ' + field_name}] # FIXME: internationalisation of "any" for row_bis in res_bis: values.append({'value' : cgi.escape(row_bis[0], 1), 'text' : row_bis[1]}) box += websearch_templates.tmpl_select( fieldname = field_code, values = values ) return box def create_sortoptions(self, ln=CFG_SITE_LANG): """Produces 'Sort options' portal box.""" # load the right message language _ = gettext_set_language(ln) box = "" query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE id_collection=%d AND cff.type='soo' AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""" % self.id values = [{'value' : '', 'text': "- %s -" % _("latest first")}] res = run_sql(query) if res: for row in res: values.append({'value' : row[0], 'text': row[1]}) else: for tmp in ('title', 'author', 'report number', 'year'): values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)}) box = websearch_templates.tmpl_select( fieldname = 'sf', css_class = 'address', values = values ) box += websearch_templates.tmpl_select( fieldname = 'so', css_class = 'address', values = [ {'value' : 'a' , 'text' : _("asc.")}, {'value' : 'd' , 'text' : _("desc.")} ] ) return box def create_rankoptions(self, ln=CFG_SITE_LANG): "Produces 'Rank options' portal box." # load the right message language _ = gettext_set_language(ln) values = [{'value' : '', 'text': "- %s %s -" % (string.lower(_("OR")), _("rank by"))}] for (code, name) in get_bibrank_methods(self.id, ln): values.append({'value' : code, 'text': name}) box = websearch_templates.tmpl_select( fieldname = 'sf', css_class = 'address', values = values ) return box def create_displayoptions(self, ln=CFG_SITE_LANG): "Produces 'Display options' portal box." # load the right message language _ = gettext_set_language(ln) values = [] for i in ['10', '25', '50', '100', '250', '500']: values.append({'value' : i, 'text' : i + ' ' + _("results")}) box = websearch_templates.tmpl_select( fieldname = 'rg', css_class = 'address', values = values ) if self.get_sons(): box += websearch_templates.tmpl_select( fieldname = 'sc', css_class = 'address', values = [ {'value' : '1' , 'text' : _("split by collection")}, {'value' : '0' , 'text' : _("single list")} ] ) return box def create_formatoptions(self, ln=CFG_SITE_LANG): "Produces 'Output format options' portal box." # load the right message language _ = gettext_set_language(ln) box = "" values = [] query = """SELECT f.code,f.name FROM format AS f, collection_format AS cf WHERE cf.id_collection=%d AND cf.id_format=f.id AND f.visibility='1' ORDER BY cf.score DESC, f.name ASC""" % self.id res = run_sql(query) if res: for row in res: values.append({'value' : row[0], 'text': row[1]}) else: values.append({'value' : 'hb', 'text' : "HTML %s" % _("brief")}) box = websearch_templates.tmpl_select( fieldname = 'of', css_class = 'address', values = values ) return box def create_searchwithin_selection_box(self, fieldname='f', value='', ln='en'): """Produces 'search within' selection box for the current collection.""" # get values query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff WHERE cff.type='sew' AND cff.id_collection=%d AND cff.id_field=f.id ORDER BY cff.score DESC, f.name ASC""" % self.id res = run_sql(query) values = [{'value' : '', 'text' : get_field_i18nname("any field", ln)}] if res: for row in res: values.append({'value' : row[0], 'text' : row[1]}) else: if CFG_CERN_SITE: for tmp in ['title', 'author', 'abstract', 'report number', 'year']: values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)}) else: for tmp in ['title', 'author', 'abstract', 'keyword', 'report number', 'journal', 'year', 'fulltext', 'reference']: values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)}) return websearch_templates.tmpl_searchwithin_select( fieldname = fieldname, ln = ln, selected = value, values = values ) def create_searchexample(self): "Produces search example(s) for the current collection." out = "$collSearchExamples = getSearchExample(%d, $se);" % self.id return out - def create_searchfor(self, as=CFG_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG): + def create_searchfor(self, as=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG): "Produces either Simple or Advanced 'Search for' box for the current collection." if as == 1: return self.create_searchfor_advanced(ln) elif as == 0: return self.create_searchfor_simple(ln) else: return self.create_searchfor_super_simple(ln) def create_searchfor_super_simple(self, ln=CFG_SITE_LANG): "Produces super simple 'Search for' box for the current collection." return websearch_templates.tmpl_searchfor_super_simple( ln=ln, collection_id = self.name, collection_name=self.get_name(ln=ln), record_count=self.nbrecs, example_search_queries=self.get_example_search_queries(), ) def create_searchfor_simple(self, ln=CFG_SITE_LANG): "Produces simple 'Search for' box for the current collection." return websearch_templates.tmpl_searchfor_simple( ln=ln, collection_id = self.name, collection_name=self.get_name(ln=ln), record_count=self.nbrecs, middle_option = self.create_searchwithin_selection_box(ln=ln), ) def create_searchfor_advanced(self, ln=CFG_SITE_LANG): "Produces advanced 'Search for' box for the current collection." return websearch_templates.tmpl_searchfor_advanced( ln = ln, collection_id = self.name, collection_name=self.get_name(ln=ln), record_count=self.nbrecs, middle_option_1 = self.create_searchwithin_selection_box('f1', ln=ln), middle_option_2 = self.create_searchwithin_selection_box('f2', ln=ln), middle_option_3 = self.create_searchwithin_selection_box('f3', ln=ln), searchoptions = self.create_searchoptions(), sortoptions = self.create_sortoptions(ln), rankoptions = self.create_rankoptions(ln), displayoptions = self.create_displayoptions(ln), formatoptions = self.create_formatoptions(ln) ) def calculate_reclist(self): """Calculate, set and return the (reclist, reclist_with_nonpublic_subcolls) tuple for given collection.""" if self.calculate_reclist_run_already: # do we have to recalculate? return (self.reclist, self.reclist_with_nonpublic_subcolls) write_message("... calculating reclist of %s" % self.name, verbose=6) reclist = HitSet() # will hold results for public sons only; good for storing into DB reclist_with_nonpublic_subcolls = HitSet() # will hold results for both public and nonpublic sons; good for deducing total # number of documents if not self.dbquery: # A - collection does not have dbquery, so query recursively all its sons # that are either non-restricted or that have the same restriction rules for coll in self.get_sons(): coll_reclist, coll_reclist_with_nonpublic_subcolls = coll.calculate_reclist() if ((coll.restricted_p() is None) or (coll.restricted_p() == self.restricted_p())): # add this reclist ``for real'' only if it is public reclist.union_update(coll_reclist) reclist_with_nonpublic_subcolls.union_update(coll_reclist_with_nonpublic_subcolls) else: # B - collection does have dbquery, so compute it: # (note: explicitly remove DELETED records) if CFG_CERN_SITE: reclist = search_pattern(None, self.dbquery + \ ' -collection:"DELETED" -collection:"DUMMY"') else: reclist = search_pattern(None, self.dbquery + ' -collection:"DELETED"') reclist_with_nonpublic_subcolls = copy.deepcopy(reclist) # store the results: self.nbrecs = len(reclist_with_nonpublic_subcolls) self.reclist = reclist self.reclist_with_nonpublic_subcolls = reclist_with_nonpublic_subcolls # last but not least, update the speed-up flag: self.calculate_reclist_run_already = 1 # return the two sets: return (self.reclist, self.reclist_with_nonpublic_subcolls) def update_reclist(self): "Update the record universe for given collection; nbrecs, reclist of the collection table." if self.update_reclist_run_already: # do we have to reupdate? return 0 write_message("... updating reclist of %s (%s recs)" % (self.name, self.nbrecs), verbose=6) sys.stdout.flush() try: run_sql("UPDATE collection SET nbrecs=%s, reclist=%s WHERE id=%s", (self.nbrecs, self.reclist.fastdump(), self.id)) self.reclist_updated_since_start = 1 except Error, e: print "Database Query Error %d: %s." % (e.args[0], e.args[1]) sys.exit(1) # last but not least, update the speed-up flag: self.update_reclist_run_already = 1 return 0 def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" date = time.time() shift_re = re.compile("([-\+]{0,1})([\d]+)([dhms])") factors = {"d":24*3600, "h":3600, "m":60, "s":1} m = shift_re.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) date = time.localtime(date + sign * factor * value) date = time.strftime(format_string, date) else: date = time.strptime(var, format_string) date = time.strftime(format_string, date) return date def get_current_time_timestamp(): """Return timestamp corresponding to the current time.""" return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) def compare_timestamps_with_tolerance(timestamp1, timestamp2, tolerance=0): """Compare two timestamps TIMESTAMP1 and TIMESTAMP2, of the form '2005-03-31 17:37:26'. Optionally receives a TOLERANCE argument (in seconds). Return -1 if TIMESTAMP1 is less than TIMESTAMP2 minus TOLERANCE, 0 if they are equal within TOLERANCE limit, and 1 if TIMESTAMP1 is greater than TIMESTAMP2 plus TOLERANCE. """ # remove any trailing .00 in timestamps: timestamp1 = re.sub(r'\.[0-9]+$', '', timestamp1) timestamp2 = re.sub(r'\.[0-9]+$', '', timestamp2) # first convert timestamps to Unix epoch seconds: timestamp1_seconds = calendar.timegm(time.strptime(timestamp1, "%Y-%m-%d %H:%M:%S")) timestamp2_seconds = calendar.timegm(time.strptime(timestamp2, "%Y-%m-%d %H:%M:%S")) # now compare them: if timestamp1_seconds < timestamp2_seconds - tolerance: return -1 elif timestamp1_seconds > timestamp2_seconds + tolerance: return 1 else: return 0 def get_database_last_updated_timestamp(): """Return last updated timestamp for collection-related and record-related database tables. """ database_tables_timestamps = [] database_tables_timestamps.append(get_table_update_time('bibrec')) database_tables_timestamps.append(get_table_update_time('bibfmt')) database_tables_timestamps.append(get_table_update_time('idxWORD%')) database_tables_timestamps.append(get_table_update_time('collection%')) database_tables_timestamps.append(get_table_update_time('portalbox')) database_tables_timestamps.append(get_table_update_time('field%')) database_tables_timestamps.append(get_table_update_time('format%')) database_tables_timestamps.append(get_table_update_time('rnkMETHODNAME')) return max(database_tables_timestamps) def get_cache_last_updated_timestamp(): """Return last updated cache timestamp.""" try: f = open(cfg_cache_last_updated_timestamp_file, "r") except: return "1970-01-01 00:00:00" timestamp = f.read() f.close() return timestamp def set_cache_last_updated_timestamp(timestamp): """Set last updated cache timestamp to TIMESTAMP.""" try: f = open(cfg_cache_last_updated_timestamp_file, "w") except: pass f.write(timestamp) f.close() return timestamp def main(): """Main that construct all the bibtask.""" task_init(authorization_action="runwebcoll", authorization_msg="WebColl Task Submission", description="""Description: webcoll updates the collection cache (record universe for a given collection plus web page elements) based on invenio.conf and DB configuration parameters. If the collection name is passed as the second argument, it'll update this collection only. If the collection name is immediately followed by a plus sign, it will also update all its desdendants. The top-level collection name may be entered as the void string.\n""", help_specific_usage=" -c, --collection\t Update cache for the given " "collection only. [all]\n" " -f, --force\t\t Force update even if cache is up to date. [no]\n" " -p, --part\t\t Update only certain cache parts (1=reclist," " 2=webpage). [both]\n" " -l, --language\t Update pages in only certain language" " (e.g. fr). [all]\n", version=__revision__, specific_params=("c:fp:l:", [ "collection=", "force", "part=", "language=" ]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=task_submit_check_options, task_run_fnc=task_run_core) def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ['-n', '--number']: self.options['number'] = value return True return False """ if key in ("-c", "--collection"): task_set_option("collection", value) elif key in ("-f", "--force"): task_set_option("force", 1) elif key in ("-p", "--part"): task_set_option("part", int(value)) elif key in ("-l", "--language"): task_set_option("language", value) else: return False return True def task_submit_check_options(): if task_has_option('collection'): coll = get_collection(task_get_option("collection")) if coll.id is None: raise StandardError, 'Collection %s does not exist' % coll.name return True def task_run_core(): """ Reimplement to add the body of the task.""" task_run_start_timestamp = get_current_time_timestamp() colls = [] # decide whether we need to run or not, by comparing last updated timestamps: write_message("Database timestamp is %s." % get_database_last_updated_timestamp(), verbose=3) write_message("Collection cache timestamp is %s." % get_cache_last_updated_timestamp(), verbose=3) if task_has_option("part"): write_message("Running cache update part %s only." % task_get_option("part"), verbose=3) if task_has_option("force") or \ compare_timestamps_with_tolerance(get_database_last_updated_timestamp(), get_cache_last_updated_timestamp(), cfg_cache_last_updated_timestamp_tolerance) >= 0: ## either forced update was requested or cache is not up to date, so recreate it: # firstly, decide which collections to do: if task_has_option("collection"): coll = get_collection(task_get_option("collection")) colls.append(coll) else: res = run_sql("SELECT name FROM collection ORDER BY id") for row in res: colls.append(get_collection(row[0])) # secondly, update collection reclist cache: if task_get_option('part', 1) == 1: i = 0 for coll in colls: i += 1 write_message("%s / reclist cache update" % coll.name) coll.calculate_reclist() task_sleep_now_if_required() coll.update_reclist() task_update_progress("Part 1/2: done %d/%d" % (i, len(colls))) task_sleep_now_if_required(can_stop_too=True) # thirdly, update collection webpage cache: if task_get_option("part", 2) == 2: i = 0 for coll in colls: i += 1 write_message("%s / webpage cache update" % coll.name) coll.update_webpage_cache() task_update_progress("Part 2/2: done %d/%d" % (i, len(colls))) task_sleep_now_if_required(can_stop_too=True) # finally update the cache last updated timestamp: # (but only when all collections were updated, not when only # some of them were forced-updated as per admin's demand) if not task_has_option("collection"): set_cache_last_updated_timestamp(task_run_start_timestamp) write_message("Collection cache timestamp is set to %s." % get_cache_last_updated_timestamp(), verbose=3) else: ## cache up to date, we don't have to run write_message("Collection cache is up to date, no need to run.") ## we are done: return True ### okay, here we go: if __name__ == '__main__': main() diff --git a/modules/websearch/lib/websearch_webinterface.py b/modules/websearch/lib/websearch_webinterface.py index a4f80d873..458f783bf 100644 --- a/modules/websearch/lib/websearch_webinterface.py +++ b/modules/websearch/lib/websearch_webinterface.py @@ -1,985 +1,985 @@ ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebSearch URL handler.""" __revision__ = "$Id$" import cgi import os import datetime from urllib import quote from mod_python import apache #maximum number of collaborating authors etc shown in GUI MAX_COLLAB_LIST = 10 MAX_KEYWORD_LIST = 10 MAX_VENUE_LIST = 10 #tag constants AUTHOR_TAG = "100__a" COAUTHOR_TAG = "700_a" AUTHOR_INST_TAG = "100__u" VENUE_TAG = "909C4p" KEYWORD_TAG = "6531_a" try: Set = set except NameError: from sets import Set from invenio.config import \ CFG_SITE_URL, \ CFG_SITE_NAME, \ CFG_CACHEDIR, \ CFG_SITE_LANG, \ CFG_SITE_ADMIN_EMAIL, \ CFG_SITE_SECURE_URL, \ CFG_WEBSEARCH_INSTANT_BROWSE_RSS, \ CFG_WEBSEARCH_RSS_TTL, \ CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS, \ - CFG_DEFAULT_SEARCH_INTERFACE, \ - CFG_ENABLED_SEARCH_INTERFACES + CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \ + CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES from invenio.dbquery import Error from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.urlutils import redirect_to_url, make_canonical_urlargd, drop_default_urlargd, create_html_link from invenio.webuser import getUid, page_not_authorized, get_user_preferences, \ collect_user_info, http_check_credentials from invenio import search_engine from invenio.websubmit_webinterface import WebInterfaceFilesPages from invenio.webcomment_webinterface import WebInterfaceCommentsPages from invenio.webpage import page, create_error_box from invenio.messages import gettext_set_language from invenio.search_engine import get_colID, get_coll_i18nname, \ check_user_can_view_record, collection_restricted_p, restricted_collection_cache, \ get_fieldvalues from invenio.access_control_engine import acc_authorize_action from invenio.access_control_config import VIEWRESTRCOLL from invenio.access_control_mailcookie import mail_cookie_create_authorize_action from invenio.bibformat import format_records from invenio.bibformat_engine import get_output_formats from invenio.websearch_webcoll import mymkdir, get_collection from invenio.intbitset import intbitset from invenio.bibupload import find_record_from_sysno from invenio.bibrank_citation_searcher import get_author_cited_by, get_cited_by_list from invenio.bibrank_downloads_indexer import get_download_weight_total from invenio.search_engine_summarizer import summarize_records import invenio.template websearch_templates = invenio.template.load('websearch') search_results_default_urlargd = websearch_templates.search_results_default_urlargd search_interface_default_urlargd = websearch_templates.search_interface_default_urlargd try: output_formats = [output_format['attrs']['code'].lower() for output_format in \ get_output_formats(with_attributes=True).values()] except KeyError: output_formats = ['xd', 'xm', 'hd', 'hb', 'hs', 'hx'] output_formats.extend(['hm', 't', 'h']) def wash_search_urlargd(form): """ Create canonical search arguments from those passed via web form. """ argd = wash_urlargd(form, search_results_default_urlargd) # Sometimes, users pass ot=245,700 instead of # ot=245&ot=700. Normalize that. ots = [] for ot in argd['ot']: ots += ot.split(',') argd['ot'] = ots # We can either get the mode of function as # action=, or by setting action_browse or # action_search. if argd['action_browse']: argd['action'] = 'browse' elif argd['action_search']: argd['action'] = 'search' else: if argd['action'] not in ('browse', 'search'): argd['action'] = 'search' del argd['action_browse'] del argd['action_search'] return argd class WebInterfaceAuthorPages(WebInterfaceDirectory): """ Handle /author/Doe%2C+John etc set of pages.""" _exports = ['author'] def __init__(self, authorname=''): """Constructor.""" self.authorname = authorname def _lookup(self, component, path): """This handler parses dynamic URLs (/author/John+Doe).""" return WebInterfaceAuthorPages(component), path def __call__(self, req, form): """Serve the page in the given language.""" argd = wash_urlargd(form, {'ln': (str, CFG_SITE_LANG)}) ln = argd['ln'] req.argd = argd #needed since perform_req_search # start page req.content_type = "text/html" req.send_http_header() uid = getUid(req) search_engine.page_start(req, "hb", "", "", ln, uid) #wants to check it in case of no results self.authorname = self.authorname.replace("+"," ") if not self.authorname: return websearch_templates.tmpl_author_information(req, {}, self.authorname, 0, {}, {}, {}, {}, {}, ln) citelist = get_author_cited_by(self.authorname) #search the publications by this author pubs = search_engine.perform_request_search(req=req, p=self.authorname, f="author") #get most frequent first authors of these pubs authors = search_engine.get_most_popular_values_for_code(pubs, AUTHOR_TAG) #and affiliates collabs = search_engine.get_most_popular_values_for_code(pubs, COAUTHOR_TAG) #and publication venues venuedict = search_engine.get_values_for_code_dict(pubs, VENUE_TAG) #and keywords kwdict = search_engine.get_values_for_code_dict(pubs, KEYWORD_TAG) #construct a simple list of tuples that contains keywords that appear more than once #moreover, limit the length of the list to MAX_KEYWORD_LIST kwtuples = [] for k in kwdict.keys(): if kwdict[k] > 1: mytuple = (kwdict[k], k) kwtuples.append(mytuple) #sort .. kwtuples.sort() kwtuples.reverse() kwtuples = kwtuples[0:MAX_KEYWORD_LIST] #same for venues vtuples = [] for k in venuedict.keys(): if venuedict[k] > 1: mytuple = (venuedict[k], k) vtuples.append(mytuple) #sort .. vtuples.sort() vtuples.reverse() vtuples = vtuples[0:MAX_VENUE_LIST] authors.extend(collabs) #join #remove the author in question from authors: they are associates if (authors.count(self.authorname) > 0): authors.remove(self.authorname) authors = authors[0:MAX_COLLAB_LIST] #cut extra #a dict. keys: affiliations, values: lists of publications author_aff_pubs = self.get_institute_pub_dict(pubs) authoraffs = author_aff_pubs.keys() #find out how many times these records have been downloaded recsloads = {} recsloads = get_download_weight_total(recsloads, pubs) #sum up totaldownloads = 0 for k in recsloads.keys(): totaldownloads = totaldownloads + recsloads[k] #get cited by.. citedbylist = get_cited_by_list(pubs) #finally all stuff there, call the template websearch_templates.tmpl_author_information(req, pubs, self.authorname, totaldownloads, author_aff_pubs, citedbylist, kwtuples, authors, vtuples, ln) #cited-by summary out = summarize_records(intbitset(pubs), 'hcs', ln, self.authorname, 'author', req) req.write(out) simauthbox = search_engine.create_similarly_named_authors_link_box(self.authorname) req.write(simauthbox) return search_engine.page_end(req, 'hb', ln) def get_institute_pub_dict(self, recids): #return a dictionary consisting of institute -> list of publications affus = [] #list of insts from the record author_aff_pubs = {} #the disct to be build for recid in recids: #iterate all so that we get first author's intitute #if this the first author OR #"his" institute if he is an affliate author mainauthors = get_fieldvalues(recid, AUTHOR_TAG) mainauthor = " " if mainauthors: mainauthor = mainauthors[0] if (mainauthor == self.authorname): affus = get_fieldvalues(recid, AUTHOR_INST_TAG) #if this is empty, add a dummy " " value if (affus == []): affus = [" "] for a in affus: #add in author_aff_pubs if (author_aff_pubs.has_key(a)): tmp = author_aff_pubs[a] tmp.append(recid) author_aff_pubs[a] = tmp else: author_aff_pubs[a] = [recid] return author_aff_pubs index = __call__ class WebInterfaceRecordPages(WebInterfaceDirectory): """ Handling of a /record/ URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.export = self self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.citations = self self.export = WebInterfaceRecordExport(self.recid, self.format) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid argd['tab'] = self.tab if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest' and not user_info['apache_user']: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : search_engine.guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg,\ navmenuid='search') # mod_python does not like to return [] in case when of=id: out = search_engine.perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /record/123 or /record/123/ index = __call__ class WebInterfaceRecordRestrictedPages(WebInterfaceDirectory): """ Handling of a /record-restricted/ URL fragment """ _exports = ['', 'files', 'reviews', 'comments', 'usage', 'references', 'export', 'citations'] #_exports.extend(output_formats) def __init__(self, recid, tab, format=None): self.recid = recid self.tab = tab self.format = format self.files = WebInterfaceFilesPages(self.recid) self.reviews = WebInterfaceCommentsPages(self.recid, reviews=1) self.comments = WebInterfaceCommentsPages(self.recid) self.usage = self self.references = self self.citations = self self.export = WebInterfaceRecordExport(self.recid, self.format) return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass record_primary_collection = search_engine.guess_primary_collection_of_a_record(self.recid) if collection_restricted_p(record_primary_collection): (auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=record_primary_collection) if auth_code: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') # Keep all the arguments, they might be reused in the # record page itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = search_engine.perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /record/123 or /record/123/ index = __call__ class WebInterfaceSearchResultsPages(WebInterfaceDirectory): """ Handling of the /search URL and its sub-pages. """ _exports = ['', 'authenticate', 'cache', 'log'] def __call__(self, req, form): """ Perform a search. """ argd = wash_search_urlargd(form) _ = gettext_set_language(argd['ln']) if req.method == 'POST': raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED uid = getUid(req) user_info = collect_user_info(req) if uid == -1: return page_not_authorized(req, "../", text = _("You are not authorized to view this area."), navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass involved_collections = Set() involved_collections.update(argd['c']) involved_collections.add(argd['cc']) if argd['id'] > 0: argd['recid'] = argd['id'] if argd['idb'] > 0: argd['recidb'] = argd['idb'] if argd['sysno']: tmp_recid = find_record_from_sysno(argd['sysno']) if tmp_recid: argd['recid'] = tmp_recid if argd['sysnb']: tmp_recid = find_record_from_sysno(argd['sysnb']) if tmp_recid: argd['recidb'] = tmp_recid if argd['recid'] > 0: if argd['recidb'] > argd['recid']: # Hack to check if among the restricted collections # at least a record of the range is there and # then if the user is not authorized for that # collection. recids = intbitset(xrange(argd['recid'], argd['recidb'])) restricted_colls = restricted_collection_cache.get_cache() for collname in restricted_colls: (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname) if auth_code: coll_recids = get_collection(collname).reclist if coll_recids & recids: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : collname}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action' : cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target) else: involved_collections.add(search_engine.guess_primary_collection_of_a_record(argd['recid'])) # If any of the collection requires authentication, redirect # to the authentication form. for coll in involved_collections: if collection_restricted_p(coll): (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action' : cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target) # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd # mod_python does not like to return [] in case when of=id: out = search_engine.perform_request_search(req, **argd) if out == []: return str(out) else: return out def cache(self, req, form): """Search cache page.""" argd = wash_urlargd(form, {'action': (str, 'show')}) return search_engine.perform_request_cache(req, action=argd['action']) def log(self, req, form): """Search log page.""" argd = wash_urlargd(form, {'date': (str, '')}) return search_engine.perform_request_log(req, date=argd['date']) def authenticate(self, req, form): """Restricted search results pages.""" argd = wash_search_urlargd(form) user_info = collect_user_info(req) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): (auth_code, dummy) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action' : cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target) # Keep all the arguments, they might be reused in the # search_engine itself to derivate other queries req.argd = argd uid = getUid(req) if uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # mod_python does not like to return [] in case when of=id: out = search_engine.perform_request_search(req, **argd) if out == []: return str(out) else: return out # Parameters for the legacy URLs, of the form /?c=ALEPH legacy_collection_default_urlargd = { - 'as': (int, CFG_DEFAULT_SEARCH_INTERFACE), + 'as': (int, CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE), 'verbose': (int, 0), 'c': (str, CFG_SITE_NAME)} class WebInterfaceSearchInterfacePages(WebInterfaceDirectory): """ Handling of collection navigation.""" _exports = [('index.py', 'legacy_collection'), ('', 'legacy_collection'), ('search.py', 'legacy_search'), 'search', 'openurl', 'testsso'] search = WebInterfaceSearchResultsPages() def testsso(self, req, form): """ For testing single sign-on """ req.add_common_vars() sso_env = {} for var, value in req.subprocess_env.iteritems(): if var.startswith('HTTP_ADFS_'): sso_env[var] = value out = "SSO test</TITLE</HEAD>" out += "<BODY><TABLE>" for var, value in sso_env.iteritems(): out += "<TR><TD><STRONG>%s</STRONG></TD><TD>%s</TD></TR>" % (var, value) out += "</TABLE></BODY></HTML>" return out def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for collections and records)""" if component == 'collection': c = '/'.join(path) def answer(req, form): """Accessing collections cached pages.""" # Accessing collections: this is for accessing the # cached page on top of each collection. argd = wash_urlargd(form, search_interface_default_urlargd) # We simply return the cached page of the collection argd['c'] = c if not argd['c']: # collection argument not present; display # home collection by default argd['c'] = CFG_SITE_NAME return display_collection(req, **argd) return answer, [] elif component == 'record' or component == 'record-restricted': try: recid = int(path[0]) except IndexError: # display record #1 for URL /record without a number recid = 1 except ValueError: if path[0] == '': # display record #1 for URL /record/ without a number recid = 1 else: # display page not found for URLs like /record/foo return None, [] if recid <= 0: # display page not found for URLs like /record/-5 or /record/0 return None, [] format = None tab = '' try: if path[1] in ['', 'files', 'reviews', 'comments', 'usage', 'references', 'citations']: tab = path[1] elif path[1] == 'export': tab = '' format = path[2] # format = None # elif path[1] in output_formats: # tab = '' # format = path[1] else: # display page not found for URLs like /record/references # for a collection where 'references' tabs is not visible return None, [] except IndexError: # Keep normal url if tabs is not specified pass #if component == 'record-restricted': #return WebInterfaceRecordRestrictedPages(recid, tab, format), path[1:] #else: return WebInterfaceRecordPages(recid, tab, format), path[1:] return None, [] def openurl(self, req, form): """ OpenURL Handler.""" argd = wash_urlargd(form, websearch_templates.tmpl_openurl_accepted_args) ret_url = websearch_templates.tmpl_openurl2invenio(argd) if ret_url: return redirect_to_url(req, ret_url) else: return redirect_to_url(req, CFG_SITE_URL) def legacy_collection(self, req, form): """Collection URL backward compatibility handling.""" accepted_args = dict(legacy_collection_default_urlargd) accepted_args.update({'referer' : (str, ''), 'realm' : (str, '')}) argd = wash_urlargd(form, accepted_args) # Apache authentication stuff if argd['realm']: http_check_credentials(req, argd['realm']) return redirect_to_url(req, argd['referer'] or '%s/youraccount/youradminactivities' % CFG_SITE_SECURE_URL) del argd['referer'] del argd['realm'] # If we specify no collection, then we don't need to redirect # the user, so that accessing <http://yoursite/> returns the # default collection. if not form.has_key('c'): return display_collection(req, **argd) # make the collection an element of the path, and keep the # other query elements as is. If the collection is CFG_SITE_NAME, # however, redirect to the main URL. c = argd['c'] del argd['c'] if c == CFG_SITE_NAME: target = '/' else: target = '/collection/' + quote(c) target += make_canonical_urlargd(argd, legacy_collection_default_urlargd) return redirect_to_url(req, target) def legacy_search(self, req, form): """Search URL backward compatibility handling.""" argd = wash_search_urlargd(form) # We either jump into the generic search form, or the specific # /record/... display if a recid is requested if argd['recid'] != -1: target = '/record/%d' % argd['recid'] del argd['recid'] else: target = '/search' target += make_canonical_urlargd(argd, search_results_default_urlargd) return redirect_to_url(req, target) def display_collection(req, c, as, verbose, ln): """Display search interface page for collection c by looking in the collection cache.""" _ = gettext_set_language(ln) req.argd = drop_default_urlargd({'as': as, 'verbose': verbose, 'ln': ln}, search_interface_default_urlargd) # get user ID: try: uid = getUid(req) user_preferences = {} if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this collection", navmenuid='search') elif uid > 0: user_preferences = get_user_preferences(uid) except Error: return page(title=_("Internal Error"), body = create_error_box(req, verbose=verbose, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, language=ln, req=req, navmenuid='search') # start display: req.content_type = "text/html" req.send_http_header() # deduce collection id: colID = get_colID(c) if type(colID) is not int: page_body = '<p>' + (_("Sorry, collection %s does not seem to exist.") % ('<strong>' + str(c) + '</strong>')) + '</p>' page_body = '<p>' + (_("You may want to start browsing from %s.") % ('<a href="' + CFG_SITE_URL + '?ln=' + ln + '">' + get_coll_i18nname(CFG_SITE_NAME, ln) + '</a>')) + '</p>' return page(title=_("Collection %s Not Found") % cgi.escape(c), body=page_body, description=(CFG_SITE_NAME + ' - ' + _("Not found") + ': ' + cgi.escape(str(c))), keywords="%s" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') # display collection interface page: try: filedesc = open("%s/collections/%d/navtrail-as=%d-ln=%s.html" % (CFG_CACHEDIR, colID, as, ln), "r") c_navtrail = filedesc.read() filedesc.close() filedesc = open("%s/collections/%d/body-as=%d-ln=%s.html" % (CFG_CACHEDIR, colID, as, ln), "r") c_body = filedesc.read() filedesc.close() filedesc = open("%s/collections/%d/portalbox-tp-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r") c_portalbox_tp = filedesc.read() filedesc.close() filedesc = open("%s/collections/%d/portalbox-te-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r") c_portalbox_te = filedesc.read() filedesc.close() filedesc = open("%s/collections/%d/portalbox-lt-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r") c_portalbox_lt = filedesc.read() filedesc.close() # show help boxes (usually located in "tr", "top right") # if users have not banned them in their preferences: c_portalbox_rt = "" if user_preferences.get('websearch_helpbox', 1) > 0: filedesc = open("%s/collections/%d/portalbox-rt-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r") c_portalbox_rt = filedesc.read() filedesc.close() filedesc = open("%s/collections/%d/last-updated-ln=%s.html" % (CFG_CACHEDIR, colID, ln), "r") c_last_updated = filedesc.read() filedesc.close() title = get_coll_i18nname(c, ln) # if there is only one collection defined, do not print its # title on the page as it would be displayed repetitively. if len(search_engine.collection_reclist_cache.keys()) == 1: title = "" rssurl = CFG_SITE_URL + '/rss' if c != CFG_SITE_NAME: rssurl += '?cc=' + quote(c) return page(title=title, body=c_body, navtrail=c_navtrail, description="%s - %s" % (CFG_SITE_NAME, c), keywords="%s, %s" % (CFG_SITE_NAME, c), uid=uid, language=ln, req=req, cdspageboxlefttopadd=c_portalbox_lt, cdspageboxrighttopadd=c_portalbox_rt, titleprologue=c_portalbox_tp, titleepilogue=c_portalbox_te, lastupdated=c_last_updated, navmenuid='search', rssurl=rssurl, - show_title_p=-1 not in CFG_ENABLED_SEARCH_INTERFACES) + show_title_p=-1 not in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES) except: if verbose >= 9: req.write("<br />c=%s" % c) req.write("<br />as=%s" % as) req.write("<br />ln=%s" % ln) req.write("<br />colID=%s" % colID) req.write("<br />uid=%s" % uid) return page(title=_("Internal Error"), body = create_error_box(req, ln=ln), description="%s - Internal Error" % CFG_SITE_NAME, keywords="%s, Internal Error" % CFG_SITE_NAME, uid=uid, language=ln, req=req, navmenuid='search') return "\n" class WebInterfaceRSSFeedServicePages(WebInterfaceDirectory): """RSS 2.0 feed service pages.""" def __call__(self, req, form): """RSS 2.0 feed service.""" # Keep only interesting parameters for the search default_params = websearch_templates.rss_default_urlargd # We need to keep 'jrec' and 'rg' here in order to have # 'multi-page' RSS. These parameters are not kept be default # as we don't want to consider them when building RSS links # from search and browse pages. default_params.update({'jrec':(int, 1), 'rg': (int, CFG_WEBSEARCH_INSTANT_BROWSE_RSS)}) argd = wash_urlargd(form, default_params) for coll in argd['c'] + [argd['cc']]: if collection_restricted_p(coll): user_info = collect_user_info(req) (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=coll) if auth_code: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : coll}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action' : cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target) # Create a standard filename with these parameters current_url = websearch_templates.build_rss_url(argd) cache_filename = current_url.split('/')[-1] # In the same way as previously, add 'jrec' & 'rg' req.content_type = "application/rss+xml" req.send_http_header() try: # Try to read from cache path = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) # Check if cache needs refresh filedesc = open(path, "r") last_update_time = datetime.datetime.fromtimestamp(os.stat(os.path.abspath(path)).st_mtime) assert(datetime.datetime.now() < last_update_time + datetime.timedelta(minutes=CFG_WEBSEARCH_RSS_TTL)) c_rss = filedesc.read() filedesc.close() req.write(c_rss) return except Exception, e: # do it live and cache previous_url = None if argd['jrec'] > 1: prev_jrec = argd['jrec'] - argd['rg'] if prev_jrec < 1: prev_jrec = 1 previous_url = websearch_templates.build_rss_url(argd, jrec=prev_jrec) recIDs = search_engine.perform_request_search(req, of="id", c=argd['c'], cc=argd['cc'], p=argd['p'], f=argd['f'], p1=argd['p1'], f1=argd['f1'], m1=argd['m1'], op1=argd['op1'], p2=argd['p2'], f2=argd['f2'], m2=argd['m2'], op2=argd['op2'], p3=argd['p3'], f3=argd['f3'], m3=argd['m3']) next_url = None if len(recIDs) >= argd['jrec'] + argd['rg']: next_url = websearch_templates.build_rss_url(argd, jrec=(argd['jrec'] + argd['rg'])) recIDs = recIDs[-argd['jrec']:(-argd['rg']-argd['jrec']):-1] rss_prologue = '<?xml version="1.0" encoding="UTF-8"?>\n' + \ websearch_templates.tmpl_xml_rss_prologue(current_url=current_url, previous_url=previous_url, next_url=next_url) + '\n' req.write(rss_prologue) rss_body = format_records(recIDs, of='xr', record_separator="\n", req=req, epilogue="\n") rss_epilogue = websearch_templates.tmpl_xml_rss_epilogue() + '\n' req.write(rss_epilogue) # update cache dirname = "%s/rss" % (CFG_CACHEDIR) mymkdir(dirname) fullfilename = "%s/rss/%s.xml" % (CFG_CACHEDIR, cache_filename) try: # Remove the file just in case it already existed # so that a bit of space is created os.remove(fullfilename) except OSError: pass # Check if there's enough space to cache the request. if len(os.listdir(dirname)) < CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS: try: os.umask(022) f = open(fullfilename, "w") f.write(rss_prologue + rss_body + rss_epilogue) f.close() except IOError, v: if v[0] == 36: # URL was too long. Never mind, don't cache pass else: raise repr(v) index = __call__ class WebInterfaceRecordExport(WebInterfaceDirectory): """ Handling of a /record/<recid>/export/<format> URL fragment """ _exports = output_formats def __init__(self, recid, format=None): self.recid = recid self.format = format for output_format in output_formats: self.__dict__[output_format] = self return def __call__(self, req, form): argd = wash_search_urlargd(form) argd['recid'] = self.recid if self.format is not None: argd['of'] = self.format req.argd = argd uid = getUid(req) if uid == -1: return page_not_authorized(req, "../", text="You are not authorized to view this record.", navmenuid='search') elif uid > 0: pref = get_user_preferences(uid) try: if not form.has_key('rg'): # fetch user rg preference only if not overridden via URL argd['rg'] = int(pref['websearch_group_records']) except (KeyError, ValueError): pass # Check if the record belongs to a restricted primary # collection. If yes, redirect to the authenticated URL. user_info = collect_user_info(req) (auth_code, auth_msg) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest' and not user_info['apache_user']: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : search_engine.guess_primary_collection_of_a_record(self.recid)}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : argd['ln'], 'referer' : CFG_SITE_URL + req.unparsed_uri}, {}) return redirect_to_url(req, target) elif auth_code: return page_not_authorized(req, "../", \ text = auth_msg,\ navmenuid='search') # mod_python does not like to return [] in case when of=id: out = search_engine.perform_request_search(req, **argd) if out == []: return str(out) else: return out # Return the same page wether we ask for /record/123/export/xm or /record/123/export/xm/ index = __call__