diff --git a/config/invenio.conf b/config/invenio.conf index 0d9780f4e..b7d71f38e 100644 --- a/config/invenio.conf +++ b/config/invenio.conf @@ -1,1796 +1,1809 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ################################################### ## About 'invenio.conf' and 'invenio-local.conf' ## ################################################### ## The 'invenio.conf' file contains the vanilla default configuration ## parameters of a Invenio installation, as coming out of the ## distribution. The file should be self-explanatory. Once installed ## in its usual location (usually /opt/invenio/etc), you could in ## principle go ahead and change the values according to your local ## needs, but this is not advised. ## ## If you would like to customize some of these parameters, you should ## rather create a file named 'invenio-local.conf' in the same ## directory where 'invenio.conf' lives and you should write there ## only the customizations that you want to be different from the ## vanilla defaults. ## ## Here is a realistic, minimalist, yet production-ready example of ## what you would typically put there: ## ## $ cat /opt/invenio/etc/invenio-local.conf ## [Invenio] ## CFG_SITE_NAME = John Doe's Document Server ## CFG_SITE_NAME_INTL_fr = Serveur des Documents de John Doe ## CFG_SITE_URL = http://your.site.com ## CFG_SITE_SECURE_URL = https://your.site.com ## CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com ## CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com ## CFG_WEBALERT_ALERT_ENGINE_EMAIL = john.doe@your.site.com ## CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = john.doe@your.site.com ## CFG_WEBCOMMENT_DEFAULT_MODERATOR = john.doe@your.site.com ## CFG_DATABASE_HOST = localhost ## CFG_DATABASE_NAME = invenio ## CFG_DATABASE_USER = invenio ## CFG_DATABASE_PASS = my123p$ss ## ## You should override at least the parameters mentioned above and the ## parameters mentioned in the `Part 1: Essential parameters' below in ## order to define some very essential runtime parameters such as the ## name of your document server (CFG_SITE_NAME and ## CFG_SITE_NAME_INTL_*), the visible URL of your document server ## (CFG_SITE_URL and CFG_SITE_SECURE_URL), the email address of the ## local Invenio administrator, comment moderator, and alert engine ## (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_ADMIN_EMAIL, etc), and last but ## not least your database credentials (CFG_DATABASE_*). ## ## The Invenio system will then read both the default invenio.conf ## file and your customized invenio-local.conf file and it will ## override any default options with the ones you have specified in ## your local file. This cascading of configuration parameters will ## ease your future upgrades. [Invenio] ################################### ## Part 1: Essential parameters ## ################################### ## This part defines essential Invenio internal parameters that ## everybody should override, like the name of the server or the email ## address of the local Invenio administrator. ## CFG_DATABASE_* - specify which MySQL server to use, the name of the ## database to use, and the database access credentials. CFG_DATABASE_HOST = localhost CFG_DATABASE_PORT = 3306 CFG_DATABASE_NAME = invenio CFG_DATABASE_USER = invenio CFG_DATABASE_PASS = my123p$ss ## CFG_SITE_URL - specify URL under which your installation will be ## visible. For example, use "http://your.site.com". Do not leave ## trailing slash. CFG_SITE_URL = http://localhost ## CFG_SITE_SECURE_URL - specify secure URL under which your ## installation secure pages such as login or registration will be ## visible. For example, use "https://your.site.com". Do not leave ## trailing slash. If you don't plan on using HTTPS, then you may ## leave this empty. CFG_SITE_SECURE_URL = https://localhost ## CFG_SITE_NAME -- the visible name of your Invenio installation. CFG_SITE_NAME = Atlantis Institute of Fictive Science ## CFG_SITE_NAME_INTL -- the international versions of CFG_SITE_NAME ## in various languages. (See also CFG_SITE_LANGS below.) CFG_SITE_NAME_INTL_en = Atlantis Institute of Fictive Science CFG_SITE_NAME_INTL_fr = Atlantis Institut des Sciences Fictives CFG_SITE_NAME_INTL_de = Atlantis Institut der fiktiven Wissenschaft CFG_SITE_NAME_INTL_es = Atlantis Instituto de la Ciencia Fictive CFG_SITE_NAME_INTL_ca = Institut Atlantis de Ciència Fictícia CFG_SITE_NAME_INTL_pt = Instituto Atlantis de Ciência Fictícia CFG_SITE_NAME_INTL_it = Atlantis Istituto di Scienza Fittizia CFG_SITE_NAME_INTL_ru = Институт Фиктивных Наук Атлантиды CFG_SITE_NAME_INTL_sk = Atlantis Inštitút Fiktívnych Vied CFG_SITE_NAME_INTL_cs = Atlantis Institut Fiktivních Věd CFG_SITE_NAME_INTL_no = Atlantis Institutt for Fiktiv Vitenskap CFG_SITE_NAME_INTL_sv = Atlantis Institut för Fiktiv Vetenskap CFG_SITE_NAME_INTL_el = Ινστιτούτο Φανταστικών Επιστημών Ατλαντίδος CFG_SITE_NAME_INTL_uk = Інститут вигаданих наук в Атлантісі CFG_SITE_NAME_INTL_ja = Fictive 科学のAtlantis の協会 CFG_SITE_NAME_INTL_pl = Instytut Fikcyjnej Nauki Atlantis CFG_SITE_NAME_INTL_bg = Институт за фиктивни науки Атлантис CFG_SITE_NAME_INTL_hr = Institut Fiktivnih Znanosti Atlantis CFG_SITE_NAME_INTL_zh_CN = 阿特兰提斯虚拟科学学院 CFG_SITE_NAME_INTL_zh_TW = 阿特蘭提斯虛擬科學學院 CFG_SITE_NAME_INTL_hu = Kitalált Tudományok Atlantiszi Intézete CFG_SITE_NAME_INTL_af = Atlantis Instituut van Fiktiewe Wetenskap CFG_SITE_NAME_INTL_gl = Instituto Atlantis de Ciencia Fictive CFG_SITE_NAME_INTL_ro = Institutul Atlantis al Ştiinţelor Fictive CFG_SITE_NAME_INTL_rw = Atlantis Ishuri Rikuru Ry'ubuhanga CFG_SITE_NAME_INTL_ka = ატლანტიდის ფიქტიური მეცნიერების ინსტიტუტი CFG_SITE_NAME_INTL_lt = Fiktyvių Mokslų Institutas Atlantis CFG_SITE_NAME_INTL_ar = معهد أطلنطيس للعلوم الافتراضية ## CFG_SITE_LANG -- the default language of the interface: ' CFG_SITE_LANG = en ## CFG_SITE_LANGS -- list of all languages the user interface should ## be available in, separated by commas. The order specified below ## will be respected on the interface pages. A good default would be ## to use the alphabetical order. Currently supported languages ## include Afrikaans, Arabic, Bulgarian, Catalan, Czech, German, Georgian, ## Greek, English, Spanish, French, Croatian, Hungarian, Galician, ## Italian, Japanese, Kinyarwanda, Lithuanian, Norwegian, Polish, ## Portuguese, Romanian, Russian, Slovak, Swedish, Ukrainian, Chinese ## (China), Chinese (Taiwan), so that the eventual maximum you can ## currently select is ## "af,ar,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW". CFG_SITE_LANGS = af,ar,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW ## CFG_SITE_SUPPORT_EMAIL -- the email address of the support team for ## this installation: CFG_SITE_SUPPORT_EMAIL = info@invenio-software.org ## CFG_SITE_ADMIN_EMAIL -- the email address of the 'superuser' for ## this installation. Enter your email address below and login with ## this address when using Invenio inistration modules. You ## will then be automatically recognized as superuser of the system. CFG_SITE_ADMIN_EMAIL = info@invenio-software.org ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES -- list of email addresses to ## which an email should be sent in case of emergency (e.g. bibsched ## queue has been stopped because of an error). Configuration ## dictionary allows for different recipients based on weekday and ## time-of-day. Example: ## ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = { ## 'Sunday 22:00-06:00': '0041761111111@email2sms.foo.com', ## '06:00-18:00': 'team-in-europe@foo.com,0041762222222@email2sms.foo.com', ## '18:00-06:00': 'team-in-usa@foo.com', ## '*': 'john.doe.phone@foo.com'} ## ## If you want the emergency email notifications to always go to the ## same address, just use the wildcard line in the above example. CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = {} ## CFG_SITE_ADMIN_EMAIL_EXCEPTIONS -- set this to 0 if you do not want ## to receive any captured exception via email to CFG_SITE_ADMIN_EMAIL ## address. Captured exceptions will still be available in ## var/log/invenio.err file. Set this to 1 if you want to receive ## some of the captured exceptions (this depends on the actual place ## where the exception is captured). Set this to 2 if you want to ## receive all captured exceptions. CFG_SITE_ADMIN_EMAIL_EXCEPTIONS = 1 ## CFG_SITE_RECORD -- what is the URI part representing detailed ## record pages? We recomment to leave the default value `record' ## unchanged. CFG_SITE_RECORD = record ## CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER -- set this to ## the number of seconds after which to reset the exception notification ## counter. A given repetitive exception is notified via email with a ## logarithmic strategy: the first time it is seen it is sent via email, ## then the second time, then the fourth, then the eighth and so forth. ## If the number of seconds elapsed since the last time it was notified ## is greater than CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER ## then the internal counter is reset in order not to have exception ## notification become more and more rare. CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER = 14400 ## CFG_CERN_SITE -- do we want to enable CERN-specific code? ## Put "1" for "yes" and "0" for "no". CFG_CERN_SITE = 0 ## CFG_INSPIRE_SITE -- do we want to enable INSPIRE-specific code? ## Put "1" for "yes" and "0" for "no". CFG_INSPIRE_SITE = 0 ## CFG_ADS_SITE -- do we want to enable ADS-specific code? ## Put "1" for "yes" and "0" for "no". CFG_ADS_SITE = 0 ## CFG_OPENAIRE_SITE -- do we want to enable OpenAIRE-specific code? ## Put "1" for "yes" and "0" for "no". CFG_OPENAIRE_SITE = 0 ## CFG_DEVEL_SITE -- is this a development site? If it is, you might ## prefer that it does not do certain things. For example, you might ## not want WebSubmit to send certain emails or trigger certain ## processes on a development site. ## Put "1" for "yes" (this is a development site) or "0" for "no" ## (this isn't a development site.) CFG_DEVEL_SITE = 0 ################################ ## Part 2: Web page style ## ################################ ## The variables affecting the page style. The most important one is ## the 'template skin' you would like to use and the obfuscation mode ## for your email addresses. Please refer to the WebStyle Admin Guide ## for more explanation. The other variables are listed here mostly ## for backwards compatibility purposes only. ## CFG_WEBSTYLE_TEMPLATE_SKIN -- what template skin do you want to ## use? CFG_WEBSTYLE_TEMPLATE_SKIN = default ## CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE. How do we "protect" ## email addresses from undesired automated email harvesters? This ## setting will not affect 'support' and 'admin' emails. ## NOTE: there is no ultimate solution to protect against email ## harvesting. All have drawbacks and can more or less be ## circumvented. Choose you preferred mode ([t] means "transparent" ## for the user): ## -1: hide all emails. ## [t] 0 : no protection, email returned as is. ## foo@example.com => foo@example.com ## 1 : basic email munging: replaces @ by [at] and . by [dot] ## foo@example.com => foo [at] example [dot] com ## [t] 2 : transparent name mangling: characters are replaced by ## equivalent HTML entities. ## foo@example.com => foo@example.com ## [t] 3 : javascript insertion. Requires Javascript enabled on client ## side. ## 4 : replaces @ and . characters by gif equivalents. ## foo@example.com => foo<img src="at.gif" alt=" [at] ">example<img src="dot.gif" alt=" [dot] ">com CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE = 2 ## CFG_WEBSTYLE_INSPECT_TEMPLATES -- Do we want to debug all template ## functions so that they would return HTML results wrapped in ## comments indicating which part of HTML page was created by which ## template function? Useful only for debugging Pythonic HTML ## template. See WebStyle Admin Guide for more information. CFG_WEBSTYLE_INSPECT_TEMPLATES = 0 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP -- eventual global HTML ## left top box: CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM -- eventual global ## HTML left bottom box: CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP -- eventual global ## HTML right top box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM -- eventual global ## HTML right bottom box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM = ## CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST -- when certain HTTP status ## codes are raised to the WSGI handler, the corresponding exceptions ## and error messages can be sent to the system administrator for ## inspecting. This is useful to detect and correct errors. The ## variable represents a comma-separated list of HTTP statuses that ## should alert admin. Wildcards are possible. If the status is ## followed by an "r", it means that a referer is required to exist ## (useful to distinguish broken known links from URL typos when 404 ## errors are raised). CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST = 404r,400,5*,41* ## CFG_WEBSTYLE_HTTP_USE_COMPRESSION -- whether to enable deflate ## compression of your HTTP/HTTPS connections. This will affect the Apache ## configuration snippets created by inveniocfg --create-apache-conf and ## the OAI-PMH Identify response. CFG_WEBSTYLE_HTTP_USE_COMPRESSION = 0 ## CFG_WEBSTYLE_REVERSE_PROXY_IPS -- if you are setting a multinode ## environment where an HTTP proxy such as mod_proxy is sitting in ## front of the Invenio web application and is forwarding requests to ## worker nodes, set here the the list of IP addresses of the allowed ## HTTP proxies. This is needed in order to avoid IP address spoofing ## when worker nodes are also available on the public Internet and ## might receive forged HTTP requests. Only HTTP requests coming from ## the specified IP addresses will be considered as forwarded from a ## reverse proxy. E.g. set this to '123.123.123.123'. CFG_WEBSTYLE_REVERSE_PROXY_IPS = ################################## ## Part 3: WebSearch parameters ## ################################## ## This section contains some configuration parameters for WebSearch ## module. Please note that WebSearch is mostly configured on ## run-time via its WebSearch Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (Note that you may modify them ## afterwards too, though.) ## CFG_WEBSEARCH_SEARCH_CACHE_SIZE -- how many queries we want to ## cache in memory per one Apache httpd process? This cache is used ## mainly for "next/previous page" functionality, but it caches also ## "popular" user queries if more than one user happen to search for ## the same thing. Note that large numbers may lead to great memory ## consumption. We recommend a value not greater than 100. CFG_WEBSEARCH_SEARCH_CACHE_SIZE = 0 ## CFG_WEBSEARCH_FIELDS_CONVERT -- if you migrate from an older ## system, you may want to map field codes of your old system (such as ## 'ti') to Invenio/MySQL ("title"). Use Python dictionary syntax ## for the translation table, e.g. {'wau':'author', 'wti':'title'}. ## Usually you don't want to do that, and you would use empty dict {}. CFG_WEBSEARCH_FIELDS_CONVERT = {} ## CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the light search interface, in ## characters. CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60 CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60 ## CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH -- width of the search ## pattern window in the simple search interface, in characters. CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 40 ## CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the advanced search interface, in ## characters. CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH = 30 ## CFG_WEBSEARCH_NB_RECORDS_TO_SORT -- how many records do we still ## want to sort? For higher numbers we print only a warning and won't ## perform any sorting other than default 'latest records first', as ## sorting would be very time consuming then. We recommend a value of ## not more than a couple of thousands. CFG_WEBSEARCH_NB_RECORDS_TO_SORT = 1000 ## CFG_WEBSEARCH_CALL_BIBFORMAT -- if a record is being displayed but ## it was not preformatted in the "HTML brief" format, do we want to ## call BibFormatting on the fly? Put "1" for "yes" and "0" for "no". ## Note that "1" will display the record exactly as if it were fully ## preformatted, but it may be slow due to on-the-fly processing; "0" ## will display a default format very fast, but it may not have all ## the fields as in the fully preformatted HTML brief format. Note ## also that this option is active only for old (PHP) formats; the new ## (Python) formats are called on the fly by default anyway, since ## they are much faster. When usure, please set "0" here. CFG_WEBSEARCH_CALL_BIBFORMAT = 0 ## CFG_WEBSEARCH_USE_ALEPH_SYSNOS -- do we want to make old SYSNOs ## visible rather than MySQL's record IDs? You may use this if you ## migrate from a different e-doc system, and you store your old ## system numbers into 970__a. Put "1" for "yes" and "0" for ## "no". Usually you don't want to do that, though. CFG_WEBSEARCH_USE_ALEPH_SYSNOS = 0 ## CFG_WEBSEARCH_I18N_LATEST_ADDITIONS -- Put "1" if you want the ## "Latest Additions" in the web collection pages to show ## internationalized records. Useful only if your brief BibFormat ## templates contains internationalized strings. Otherwise put "0" in ## order not to slow down the creation of latest additions by WebColl. CFG_WEBSEARCH_I18N_LATEST_ADDITIONS = 0 ## CFG_WEBSEARCH_INSTANT_BROWSE -- the number of records to display ## under 'Latest Additions' in the web collection pages. CFG_WEBSEARCH_INSTANT_BROWSE = 10 ## CFG_WEBSEARCH_INSTANT_BROWSE_RSS -- the number of records to ## display in the RSS feed. CFG_WEBSEARCH_INSTANT_BROWSE_RSS = 25 ## CFG_WEBSEARCH_RSS_I18N_COLLECTIONS -- comma-separated list of ## collections that feature an internationalized RSS feed on their ## main seach interface page created by webcoll. Other collections ## will have RSS feed using CFG_SITE_LANG. CFG_WEBSEARCH_RSS_I18N_COLLECTIONS = ## CFG_WEBSEARCH_RSS_TTL -- number of minutes that indicates how long ## a feed cache is valid. CFG_WEBSEARCH_RSS_TTL = 360 ## CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS -- maximum number of request kept ## in cache. If the cache is filled, following request are not cached. CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS = 1000 ## CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD -- up to how many author names ## to print explicitely; for more print "et al". Note that this is ## used in default formatting that is seldomly used, as usually ## BibFormat defines all the format. The value below is only used ## when BibFormat fails, for example. CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD = 3 ## CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS -- whether to show or ## not collection grandsons in Narrow Search boxes (sons are shown by ## default, grandsons are configurable here). Use 0 for no and 1 for ## yes. CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS = 1 ## CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX -- shall we ## create help links for Ellis, Nick or Ellis, Nicholas and friends ## when Ellis, N was searched for? Useful if you have one author ## stored in the database under several name formats, namely surname ## comma firstname and surname comma initial cataloging policy. Use 0 ## for no and 1 for yes. CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX = 1 ## CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS -- MathJax is a JavaScript ## library that renders (La)TeX mathematical formulas in the client ## browser. This parameter must contain a comma-separated list of ## output formats for which to apply the MathJax rendering, for example ## "hb,hd". If the list is empty, MathJax is disabled. CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS = ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT -- when searching ## external collections (e.g. SPIRES, CiteSeer, etc), how many seconds ## do we wait for reply before abandonning? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT = 5 ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS -- how many ## results do we fetch? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS = 10 ## CFG_WEBSEARCH_SPLIT_BY_COLLECTION -- do we want to split the search ## results by collection or not? Use 0 for not, 1 for yes. CFG_WEBSEARCH_SPLIT_BY_COLLECTION = 1 ## CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS -- the default number of ## records to display per page in the search results pages. CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS = 10 ## CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS -- in order to limit denial of ## service attacks the total number of records per group displayed as a ## result of a search query will be limited to this number. Only the superuser ## queries will not be affected by this limit. CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS = 200 ## CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL -- logged in users ## might have rights to access some restricted collections. This variable ## tweaks the kind of support the system will automatically provide to the ## user with respect to searching into these restricted collections. ## Set this to 0 in order to have the user to explicitly activate restricted ## collections in order to search into them. Set this to 1 in order to ## propose to the user the list of restricted collections to which he/she has ## rights (note: this is not yet implemented). Set this to 2 in order to ## silently add all the restricted collections to which the user has rights to ## to any query. ## Note: the system will discover which restricted collections a user has ## rights to, at login time. The time complexity of this procedure is ## proportional to the number of restricted collections. E.g. for a system ## with ~50 restricted collections, you might expect ~1s of delay in the ## login time, when this variable is set to a value higher than 0. CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL = 0 ## CFG_WEBSEARCH_SHOW_COMMENT_COUNT -- do we want to show the 'N comments' ## links on the search engine pages? (useful only when you have allowed ## commenting) CFG_WEBSEARCH_SHOW_COMMENT_COUNT = 1 ## CFG_WEBSEARCH_SHOW_REVIEW_COUNT -- do we want to show the 'N reviews' ## links on the search engine pages? (useful only when you have allowed ## reviewing) CFG_WEBSEARCH_SHOW_REVIEW_COUNT = 1 ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS -- how many full-text snippets to ## display for full-text searches? CFG_WEBSEARCH_FULLTEXT_SNIPPETS = 4 ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS -- how many context words ## to display around the pattern in the snippet? CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS = 4 ## CFG_WEBSEARCH_WILDCARD_LIMIT -- some of the queries, wildcard ## queries in particular (ex: cern*, a*), but also regular expressions ## (ex: [a-z]+), may take a long time to respond due to the high ## number of hits. You can limit the number of terms matched by a ## wildcard by setting this variable. A negative value or zero means ## that none of the queries will be limited (which may be wanted by ## also prone to denial-of-service kind of attacks). CFG_WEBSEARCH_WILDCARD_LIMIT = 50000 ## CFG_WEBSEARCH_SYNONYM_KBRS -- defines which knowledge bases are to ## be used for which index in order to provide runtime synonym lookup ## of user-supplied terms, and what massaging function should be used ## upon search pattern before performing the KB lookup. (Can be one ## of `exact', 'leading_to_comma', `leading_to_number'.) CFG_WEBSEARCH_SYNONYM_KBRS = { 'journal': ['SEARCH-SYNONYM-JOURNAL', 'leading_to_number'], } ## CFG_SOLR_URL -- optionally, you may use Solr to serve full-text ## queries. If so, please specify the URL of your Solr instance. ## (example: http://localhost:8080/sorl) CFG_SOLR_URL = ## CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT -- specify the limit when ## the previous/next/back hit links are to be displayed on detailed record pages. ## In order to speeding up list manipulations, if a search returns lots of hits, ## more than this limit, then do not loose time calculating next/previous/back ## hits at all, but display page directly without these. ## Note also that Invenio installations that do not like ## to have the next/previous hit link functionality would be able to set this ## variable to zero and not see anything. CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT = 1000 ## CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY -- when a record belongs to more than one ## restricted collection, if the viewrestcoll policy is set to "ALL" (default) ## then the user must be authorized to all the restricted collections, in ## order to be granted access to the specific record. If the policy is set to ## "ANY", then the user need to be authorized to only one of the collections ## in order to be granted access to the specific record. CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY = ALL ## CFG_WEBSEARCH_SPIRES_SYNTAX -- variable to configure the use of the ## SPIRES query syntax in searches. Values: 0 = SPIRES syntax is ## switched off; 1 = leading 'find' is required; 9 = leading 'find' is ## not required (leading SPIRES operator, space-operator-space, etc ## are also accepted). CFG_WEBSEARCH_SPIRES_SYNTAX = 1 ####################################### ## Part 4: BibHarvest OAI parameters ## ####################################### ## This part defines parameters for the Invenio OAI gateway. ## Useful if you are running Invenio as OAI data provider. ## CFG_OAI_ID_FIELD -- OAI identifier MARC field: CFG_OAI_ID_FIELD = 909COo ## CFG_OAI_SET_FIELD -- OAI set MARC field: CFG_OAI_SET_FIELD = 909COp ## CFG_OAI_SET_FIELD -- previous OAI set MARC field: CFG_OAI_PREVIOUS_SET_FIELD = 909COq ## CFG_OAI_DELETED_POLICY -- OAI deletedrecordspolicy ## (no/transient/persistent): CFG_OAI_DELETED_POLICY = persistent ## CFG_OAI_ID_PREFIX -- OAI identifier prefix: CFG_OAI_ID_PREFIX = atlantis.cern.ch ## CFG_OAI_SAMPLE_IDENTIFIER -- OAI sample identifier: CFG_OAI_SAMPLE_IDENTIFIER = oai:atlantis.cern.ch:123 ## CFG_OAI_IDENTIFY_DESCRIPTION -- description for the OAI Identify verb: CFG_OAI_IDENTIFY_DESCRIPTION = <description> <eprints xmlns="http://www.openarchives.org/OAI/1.1/eprints" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/1.1/eprints http://www.openarchives.org/OAI/1.1/eprints.xsd"> <content> <URL>http://atlantis.cern.ch/</URL> </content> <metadataPolicy> <text>Free and unlimited use by anybody with obligation to refer to original record</text> </metadataPolicy> <dataPolicy> <text>Full content, i.e. preprints may not be harvested by robots</text> </dataPolicy> <submissionPolicy> <text>Submission restricted. Submitted documents are subject of approval by OAI repository admins.</text> </submissionPolicy> </eprints> </description> ## CFG_OAI_LOAD -- OAI number of records in a response: CFG_OAI_LOAD = 500 ## CFG_OAI_EXPIRE -- OAI resumptionToken expiration time: CFG_OAI_EXPIRE = 90000 ## CFG_OAI_SLEEP -- service unavailable between two consecutive ## requests for CFG_OAI_SLEEP seconds: CFG_OAI_SLEEP = 2 ## CFG_OAI_METADATA_FORMATS -- mapping between accepted metadataPrefixes and ## the corresponding output format to use, its schema and its metadataNamespace. CFG_OAI_METADATA_FORMATS = { 'marcxml': ('XOAIMARC', 'http://www.openarchives.org/OAI/1.1/dc.xsd', 'http://purl.org/dc/elements/1.1/'), 'oai_dc': ('XOAIDC', 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd', 'http://www.loc.gov/MARC21/slim'), } ## CFG_OAI_FRIENDS -- list of OAI baseURL of friend repositories. See: ## <http://www.openarchives.org/OAI/2.0/guidelines-friends.htm> CFG_OAI_FRIENDS = http://cdsweb.cern.ch/oai2d,http://openaire.cern.ch/oai2d,http://export.arxiv.org/oai2 ## The following subfields are a completition to ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG. If CFG_OAI_PROVENANCE_BASEURL_SUBFIELD is ## set for a record, then the corresponding field is considered has being ## harvested via OAI-PMH ## CFG_OAI_PROVENANCE_BASEURL_SUBFIELD -- baseURL of the originDescription or a ## record CFG_OAI_PROVENANCE_BASEURL_SUBFIELD = u ## CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD -- datestamp of the originDescription ## or a record CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD = d ## CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD -- metadataNamespace of the ## originDescription or a record CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD = m ## CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD -- originDescription of the ## originDescription or a record CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD = d ## CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD -- harvestDate of the ## originDescription or a record CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD = h ## CFG_OAI_PROVENANCE_ALTERED_SUBFIELD -- altered flag of the ## originDescription or a record CFG_OAI_PROVENANCE_ALTERED_SUBFIELD = t ## NOTE: the following parameters are experimenta ## ----------------------------------------------------------------------------- ## CFG_OAI_RIGHTS_FIELD -- MARC field dedicated to storing Copyright information CFG_OAI_RIGHTS_FIELD = 542__ ## CFG_OAI_RIGHTS_HOLDER_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright holder information CFG_OAI_RIGHTS_HOLDER_SUBFIELD = d ## CFG_OAI_RIGHTS_DATE_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright date information CFG_OAI_RIGHTS_DATE_SUBFIELD = g ## CFG_OAI_RIGHTS_URI_SUBFIELD -- MARC subfield dedicated to storing the URI ## (URL or URN, more detailed statement about copyright status) information CFG_OAI_RIGHTS_URI_SUBFIELD = u ## CFG_OAI_RIGHTS_CONTACT_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright holder contact information CFG_OAI_RIGHTS_CONTACT_SUBFIELD = e ## CFG_OAI_RIGHTS_STATEMENT_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright statement as presented on the resource CFG_OAI_RIGHTS_STATEMENT_SUBFIELD = f ## CFG_OAI_LICENSE_FIELD -- MARC field dedicated to storing terms governing ## use and reproduction (license) CFG_OAI_LICENSE_FIELD = 540__ ## CFG_OAI_LICENSE_TERMS_SUBFIELD -- MARC subfield dedicated to storing the ## Terms governing use and reproduction, e.g. CC License CFG_OAI_LICENSE_TERMS_SUBFIELD = a ## CFG_OAI_LICENSE_PUBLISHER_SUBFIELD -- MARC subfield dedicated to storing the ## person or institution imposing the license (author, publisher) CFG_OAI_LICENSE_PUBLISHER_SUBFIELD = b ## CFG_OAI_LICENSE_URI_SUBFIELD -- MARC subfield dedicated to storing the URI ## URI CFG_OAI_LICENSE_URI_SUBFIELD = u ##------------------------------------------------------------------------------ ################################## ## Part 5: WebSubmit parameters ## ################################## ## This section contains some configuration parameters for WebSubmit ## module. Please note that WebSubmit is mostly configured on ## run-time via its WebSubmit Admin web interface. The parameters ## below are the ones that you do not probably want to modify during ## the runtime. ## CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT -- the fulltext ## documents are stored under "/opt/invenio/var/data/files/gX/Y" ## directories where X is 0,1,... and Y stands for bibdoc ID. Thusly ## documents Y are grouped into directories X and this variable ## indicates the maximum number of documents Y stored in each ## directory X. This limit is imposed solely for filesystem ## performance reasons in order not to have too many subdirectories in ## a given directory. CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000 ## CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS -- a comma-separated ## list of document extensions not listed in Python standard mimetype ## library that should be recognized by Invenio. CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS = hpg,link,lis,llb,mat,mpp,msg,docx,docm,xlsx,xlsm,xlsb,pptx,pptm,ppsx,ppsm ## CFG_BIBDOCFILE_USE_XSENDFILE -- if your web server supports ## XSendfile header, you may want to enable this feature in order for ## to Invenio tell the web server to stream files for download (after ## proper authorization checks) by web server's means. This helps to ## liberate Invenio worker processes from being busy with sending big ## files to clients. The web server will take care of that. Note: ## this feature is still somewhat experimental. Note: when enabled ## (set to 1), then you have to also regenerate Apache vhost conf ## snippets (inveniocfg --update-config-py --create-apache-conf). CFG_BIBDOCFILE_USE_XSENDFILE = 0 ## CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY -- a number between 0 and ## 1 that indicates probability with which MD5 checksum will be ## verified when streaming bibdocfile-managed files. (0.1 will cause ## the check to be performed once for every 10 downloads) CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY = 0.1 ## CFG_OPENOFFICE_SERVER_HOST -- the host where an OpenOffice Server is ## listening to. If localhost an OpenOffice server will be started ## automatically if it is not already running. ## Note: if you set this to an empty value this will disable the usage of ## OpenOffice for converting documents. ## If you set this to something different than localhost you'll have to take ## care to have an OpenOffice server running on the corresponding host and ## to install the same OpenOffice release both on the client and on the server ## side. ## In order to launch an OpenOffice server on a remote machine, just start ## the usual 'soffice' executable in this way: ## $> soffice -headless -nologo -nodefault -norestore -nofirststartwizard \ ## .. -accept=socket,host=HOST,port=PORT;urp;StarOffice.ComponentContext CFG_OPENOFFICE_SERVER_HOST = localhost ## CFG_OPENOFFICE_SERVER_PORT -- the port where an OpenOffice Server is ## listening to. CFG_OPENOFFICE_SERVER_PORT = 2002 ## CFG_OPENOFFICE_USER -- the user that will be used to launch the OpenOffice ## client. It is recommended to set this to a user who don't own files, like ## e.g. 'nobody'. You should also authorize your Apache server user to be ## able to become this user, e.g. by adding to your /etc/sudoers the following ## line: ## "apache ALL=(nobody) NOPASSWD: ALL" ## provided that apache is the username corresponding to the Apache user. ## On some machine this might be apache2 or www-data. CFG_OPENOFFICE_USER = nobody ################################# ## Part 6: BibIndex parameters ## ################################# ## This section contains some configuration parameters for BibIndex ## module. Please note that BibIndex is mostly configured on run-time ## via its BibIndex Admin web interface. The parameters below are the ## ones that you do not probably want to modify very often during the ## runtime. ## CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY -- when fulltext indexing, do ## you want to index locally stored files only, or also external URLs? ## Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY = 1 ## CFG_BIBINDEX_REMOVE_STOPWORDS -- when indexing, do we want to remove ## stopwords? Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_REMOVE_STOPWORDS = 0 ## CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS -- characters considered as ## alphanumeric separators of word-blocks inside words. You probably ## don't want to change this. CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS = \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ ## CFG_BIBINDEX_CHARS_PUNCTUATION -- characters considered as punctuation ## between word-blocks inside words. You probably don't want to ## change this. CFG_BIBINDEX_CHARS_PUNCTUATION = \.\,\:\;\?\!\" ## CFG_BIBINDEX_REMOVE_HTML_MARKUP -- should we attempt to remove HTML markup ## before indexing? Use 1 if you have HTML markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_HTML_MARKUP = 0 ## CFG_BIBINDEX_REMOVE_LATEX_MARKUP -- should we attempt to remove LATEX markup ## before indexing? Use 1 if you have LATEX markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_LATEX_MARKUP = 0 ## CFG_BIBINDEX_MIN_WORD_LENGTH -- minimum word length allowed to be added to ## index. The terms smaller then this amount will be discarded. ## Useful to keep the database clean, however you can safely leave ## this value on 0 for up to 1,000,000 documents. CFG_BIBINDEX_MIN_WORD_LENGTH = 0 ## CFG_BIBINDEX_URLOPENER_USERNAME and CFG_BIBINDEX_URLOPENER_PASSWORD -- ## access credentials to access restricted URLs, interesting only if ## you are fulltext-indexing files located on a remote server that is ## only available via username/password. But it's probably better to ## handle this case via IP or some convention; the current scheme is ## mostly there for demo only. CFG_BIBINDEX_URLOPENER_USERNAME = mysuperuser CFG_BIBINDEX_URLOPENER_PASSWORD = mysuperpass ## CFG_INTBITSET_ENABLE_SANITY_CHECKS -- ## Enable sanity checks for integers passed to the intbitset data ## structures. It is good to enable this during debugging ## and to disable this value for speed improvements. CFG_INTBITSET_ENABLE_SANITY_CHECKS = False ## CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES -- regular expression that matches ## docnames for which OCR is desired (set this to .* in order to enable ## OCR in general, set this to empty in order to disable it.) CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES = scan-.* ## CFG_BIBINDEX_SPLASH_PAGES -- key-value mapping where the key corresponds ## to a regular expression that matches the URLs of the splash pages of ## a given service and the value is a regular expression of the set of URLs ## referenced via <a> tags in the HTML content of the splash pages that are ## referring to documents that need to be indexed. ## NOTE: for backward compatibility reasons you can set this to a simple ## regular expression that will directly be used as the unique key of the ## map, with corresponding value set to ".*" (in order to match any URL) CFG_BIBINDEX_SPLASH_PAGES = { "http://documents\.cern\.ch/setlink\?.*": ".*", "http://ilcagenda\.linearcollider\.org/subContributionDisplay\.py\?.*|http://ilcagenda\.linearcollider\.org/contributionDisplay\.py\?.*": "http://ilcagenda\.linearcollider\.org/getFile\.py/access\?.*|http://ilcagenda\.linearcollider\.org/materialDisplay\.py\?.*", } ## CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES -- do we want ## the author word index to exclude first names to keep only last ## names? If set to True, then for the author `Bernard, Denis', only ## `Bernard' will be indexed in the word index, not `Denis'. Note ## that if you change this variable, you have to re-index the author ## index via `bibindex -w author -R'. CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES = False ## CFG_BIBINDEX_SYNONYM_KBRS -- defines which knowledge bases are to ## be used for which index in order to provide index-time synonym ## lookup, and what massaging function should be used upon search ## pattern before performing the KB lookup. (Can be one of `exact', ## 'leading_to_comma', `leading_to_number'.) CFG_BIBINDEX_SYNONYM_KBRS = { 'global': ['INDEX-SYNONYM-TITLE', 'exact'], 'title': ['INDEX-SYNONYM-TITLE', 'exact'], } ####################################### ## Part 7: Access control parameters ## ####################################### ## This section contains some configuration parameters for the access ## control system. Please note that WebAccess is mostly configured on ## run-time via its WebAccess Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (If you do want to modify them during ## runtime, for example te deny access temporarily because of backups, ## you can edit access_control_config.py directly, no need to get back ## here and no need to redo the make process.) ## CFG_ACCESS_CONTROL_LEVEL_SITE -- defines how open this site is. ## Use 0 for normal operation of the site, 1 for read-only site (all ## write operations temporarily closed), 2 for site fully closed, ## 3 for also disabling any database connection. ## Useful for site maintenance. CFG_ACCESS_CONTROL_LEVEL_SITE = 0 ## CFG_ACCESS_CONTROL_LEVEL_GUESTS -- guest users access policy. Use ## 0 to allow guest users, 1 not to allow them (all users must login). CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0 ## CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS -- account registration and ## activation policy. When 0, users can register and accounts are ## automatically activated. When 1, users can register but admin must ## activate the accounts. When 2, users cannot register nor update ## their email address, only admin can register accounts. When 3, ## users cannot register nor update email address nor password, only ## admin can register accounts. When 4, the same as 3 applies, nor ## user cannot change his login method. When 5, then the same as 4 ## applies, plus info about how to get an account is hidden from the ## login page. CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN -- limit account ## registration to certain email addresses? If wanted, give domain ## name below, e.g. "cern.ch". If not wanted, leave it empty. CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN = ## CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS -- send a ## notification email to the administrator when a new account is ## created? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT -- send a ## notification email to the user when a new account is created in order to ## to verify the validity of the provided email address? Use ## 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 1 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION -- send a ## notification email to the user when a new account is activated? ## Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION -- send a ## notification email to the user when a new account is deleted or ## account demand rejected? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0 ## CFG_APACHE_PASSWORD_FILE -- the file where Apache user credentials ## are stored. Must be an absolute pathname. If the value does not ## start by a slash, it is considered to be the filename of a file ## located under prefix/var/tmp directory. This is useful for the ## demo site testing purposes. For the production site, if you plan ## to restrict access to some collections based on the Apache user ## authentication mechanism, you should put here an absolute path to ## your Apache password file. CFG_APACHE_PASSWORD_FILE = demo-site-apache-user-passwords ## CFG_APACHE_GROUP_FILE -- the file where Apache user groups are ## defined. See the documentation of the preceding config variable. CFG_APACHE_GROUP_FILE = demo-site-apache-user-groups ################################### ## Part 8: WebSession parameters ## ################################### ## This section contains some configuration parameters for tweaking ## session handling. ## CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT -- number of days after which a session ## and the corresponding cookie is considered expired. CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT = 2 ## CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER -- number of days after which a session ## and the corresponding cookie is considered expired, when the user has ## requested to permanently stay logged in. CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER = 365 ## CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS -- when user requested ## a password reset, for how many days is the URL valid? CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS -- when an account ## activation email was sent, for how many days is the URL valid? CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS -- when ## user won't confirm his email address and not complete ## registeration, after how many days will it expire? CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS = 10 ## CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS -- when set to 1, the session ## system allocates the same uid=0 to all guests users regardless of where they ## come from. 0 allocate a unique uid to each guest. CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS = 0 ## CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS -- to prevent session cookie ## stealing, Invenio checks that the IP address of a connection is the ## same as that of the connection which created the initial session. ## This variable let you decide how many bits should be skipped during ## this check. Set this to 0 in order to enable full IP address ## checking. Set this to 32 in order to disable IP address checking. ## Intermediate values (say 8) let you have some degree of security so ## that you can trust your local network only while helping to solve ## issues related to outside clients that configured their browser to ## use a web proxy for HTTP connection but not for HTTPS, thus ## potentially having two different IP addresses. In general, if use ## HTTPS in order to serve authenticated content, you can safely set ## CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS to 32. CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS = 0 ################################ ## Part 9: BibRank parameters ## ################################ ## This section contains some configuration parameters for the ranking ## system. ## CFG_BIBRANK_SHOW_READING_STATS -- do we want to show reading ## similarity stats? ('People who viewed this page also viewed') CFG_BIBRANK_SHOW_READING_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_STATS -- do we want to show the download ## similarity stats? ('People who downloaded this document also ## downloaded') CFG_BIBRANK_SHOW_DOWNLOAD_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS -- do we want to show download ## history graph? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION -- do we ## want to show a graph representing the distribution of client IPs ## downloading given document? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION = 0 ## CFG_BIBRANK_SHOW_CITATION_LINKS -- do we want to show the 'Cited ## by' links? (useful only when you have citations in the metadata) CFG_BIBRANK_SHOW_CITATION_LINKS = 1 ## CFG_BIBRANK_SHOW_CITATION_STATS -- de we want to show citation ## stats? ('Cited by M recors', 'Co-cited with N records') CFG_BIBRANK_SHOW_CITATION_STATS = 1 ## CFG_BIBRANK_SHOW_CITATION_GRAPHS -- do we want to show citation ## history graph? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_CITATION_GRAPHS = 1 #################################### ## Part 10: WebComment parameters ## #################################### ## This section contains some configuration parameters for the ## commenting and reviewing facilities. ## CFG_WEBCOMMENT_ALLOW_COMMENTS -- do we want to allow users write ## public comments on records? CFG_WEBCOMMENT_ALLOW_COMMENTS = 1 ## CFG_WEBCOMMENT_ALLOW_REVIEWS -- do we want to allow users write ## public reviews of records? CFG_WEBCOMMENT_ALLOW_REVIEWS = 1 ## CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS -- do we want to allow short ## reviews, that is just the attribution of stars without submitting ## detailed review text? CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS = 0 ## CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN -- if users ## report a comment to be abusive, how many they have to be before the ## site admin is alerted? CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN = 5 ## CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW -- how many comments do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW -- how many reviews do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL -- do we notify the site ## admin after every comment? CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL = 1 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple comment submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS = 20 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple review submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS = 20 ## CFG_WEBCOMMENT_USE_RICH_EDITOR -- enable the WYSIWYG ## Javascript-based editor when user edits comments? CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR = False ## CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be sent: CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = info@invenio-software.org ## CFG_WEBCOMMENT_DEFAULT_MODERATOR -- if no rules are ## specified to indicate who is the comment moderator of ## a collection, this person will be used as default CFG_WEBCOMMENT_DEFAULT_MODERATOR = info@invenio-software.org ## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS -- do we want to allow the use ## of MathJax plugin to render latex input in comments? CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS = 1 ## CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION -- allow comment author to ## delete its own comment? CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION = 1 # CFG_WEBCOMMENT_EMAIL_REPLIES_TO -- which field of the record define # email addresses that should be notified of newly submitted comments, # and for which collection. Use collection names as keys, and list of # tags as values CFG_WEBCOMMENT_EMAIL_REPLIES_TO = { 'Articles': ['506__d', '506__m'], } # CFG_WEBCOMMENT_RESTRICTION_DATAFIELD -- which field of the record # define the restriction (must be linked to WebAccess # 'viewrestrcomment') to apply to newly submitted comments, and for # which collection. Use collection names as keys, and one tag as value CFG_WEBCOMMENT_RESTRICTION_DATAFIELD = { 'Articles': '5061_a', 'Pictures': '5061_a', 'Theses': '5061_a', } # CFG_WEBCOMMENT_ROUND_DATAFIELD -- which field of the record define # the current round of comment for which collection. Use collection # name as key, and one tag as value CFG_WEBCOMMENT_ROUND_DATAFIELD = { 'Articles': '562__c', 'Pictures': '562__c', } # CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE -- max file size per attached # file, in bytes. Choose 0 if you don't want to limit the size CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE = 5242880 # CFG_WEBCOMMENT_MAX_ATTACHED_FILES -- maxium number of files that can # be attached per comment. Choose 0 if you don't want to limit the # number of files. File uploads can be restricted with action # "attachcommentfile". CFG_WEBCOMMENT_MAX_ATTACHED_FILES = 5 # CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH -- how many levels of # indentation discussions can be. This can be used to ensure that # discussions will not go into deep levels of nesting if users don't # understand the difference between "reply to comment" and "add # comment". When the depth is reached, any "reply to comment" is # conceptually converted to a "reply to thread" (i.e. reply to this # parent's comment). Use -1 for no limit, 0 for unthreaded (flat) # discussions. CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH = 1 ################################## ## Part 11: BibSched parameters ## ################################## ## This section contains some configuration parameters for the ## bibliographic task scheduler. ## CFG_BIBSCHED_REFRESHTIME -- how often do we want to refresh ## bibsched monitor? (in seconds) CFG_BIBSCHED_REFRESHTIME = 5 ## CFG_BIBSCHED_LOG_PAGER -- what pager to use to view bibsched task ## logs? CFG_BIBSCHED_LOG_PAGER = /bin/more ## CFG_BIBSCHED_GC_TASKS_OLDER_THAN -- after how many days to perform the ## gargbage collector of BibSched queue (i.e. removing/moving task to archive). CFG_BIBSCHED_GC_TASKS_OLDER_THAN = 30 ## CFG_BIBSCHED_GC_TASKS_TO_REMOVE -- list of BibTask that can be safely ## removed from the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_REMOVE = bibindex,bibreformat,webcoll,bibrank,inveniogc ## CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE -- list of BibTasks that should be safely ## archived out of the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE = bibupload,oaiarchive ## CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS -- maximum number of BibTasks ## that can run concurrently. ## NOTE: concurrent tasks are still considered as an experimental ## feature. Please keep this value set to 1 on production environments. CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS = 1 ## CFG_BIBSCHED_PROCESS_USER -- bibsched and bibtask processes must ## usually run under the same identity as the Apache web server ## process in order to share proper file read/write privileges. If ## you want to force some other bibsched/bibtask user, e.g. because ## you are using a local `invenio' user that belongs to your ## `www-data' Apache user group and so shares writing rights with your ## Apache web server process in this way, then please set its username ## identity here. Otherwise we shall check whether your ## bibsched/bibtask processes are run under the same identity as your ## Apache web server process (in which case you can leave the default ## empty value here). CFG_BIBSCHED_PROCESS_USER = ## CFG_BIBSCHED_NODE_TASKS -- specific nodes may be configured to ## run only specific tasks; if you want this, then this variable is a ## dictionary of the form {'hostname1': ['task1', 'task2']}. The ## default is that any node can run any task. CFG_BIBSCHED_NODE_TASKS = {} ################################### ## Part 12: WebBasket parameters ## ################################### ## CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS -- a safety limit for ## a maximum number of displayed baskets CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS = 20 ## CFG_WEBBASKET_USE_RICH_TEXT_EDITOR -- enable the WYSIWYG ## Javascript-based editor when user edits comments in WebBasket? CFG_WEBBASKET_USE_RICH_TEXT_EDITOR = False ################################## ## Part 13: WebAlert parameters ## ################################## ## This section contains some configuration parameters for the ## automatic email notification alert system. ## CFG_WEBALERT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be sent: CFG_WEBALERT_ALERT_ENGINE_EMAIL = info@invenio-software.org ## CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL -- how many records ## at most do we send in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL = 20 ## CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL -- number of ## chars per line in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL = 72 ## CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES -- when sending alert ## emails fails, how many times we retry? CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES = 3 ## CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES -- when sending ## alert emails fails, what is the sleeptime between tries? (in ## seconds) CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES = 300 #################################### ## Part 14: WebMessage parameters ## #################################### ## CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE -- how large web messages do we ## allow? CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE = 20000 ## CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES -- how many messages for a ## regular user do we allow in its inbox? CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES = 30 ## CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS -- how many days before ## we delete orphaned messages? CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS = 60 ################################## ## Part 15: MiscUtil parameters ## ################################## ## CFG_MISCUTIL_SQL_USE_SQLALCHEMY -- whether to use SQLAlchemy.pool ## in the DB engine of Invenio. It is okay to enable this flag ## even if you have not installed SQLAlchemy. Note that Invenio will ## loose some perfomance if this option is enabled. CFG_MISCUTIL_SQL_USE_SQLALCHEMY = False ## CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT -- how many queries can we run ## inside run_sql_many() in one SQL statement? The limit value ## depends on MySQL's max_allowed_packet configuration. CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT = 10000 ## CFG_MISCUTIL_SMTP_HOST -- which server to use as outgoing mail server to ## send outgoing emails generated by the system, for example concerning ## submissions or email notification alerts. CFG_MISCUTIL_SMTP_HOST = localhost ## CFG_MISCUTIL_SMTP_PORT -- which port to use on the outgoing mail server ## defined in the previous step. CFG_MISCUTIL_SMTP_PORT = 25 ## CFG_MISCUTILS_DEFAULT_PROCESS_TIMEOUT -- the default number of seconds after ## which a process launched trough shellutils.run_process_with_timeout will ## be killed. This is useful to catch runaway processes. CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT = 300 ## CFG_MATHJAX_HOSTING -- if you plan to use MathJax to display TeX ## formulas on HTML web pages, you can specify whether you wish to use ## 'local' hosting or 'cdn' hosting of MathJax libraries. (If set to ## 'local', you have to run 'make install-mathjax-plugin' as described ## in the INSTALL guide.) If set to 'local', users will use your site ## to download MathJax sources. If set to 'cdn', users will use ## centralized MathJax CDN servers instead. Please note that using ## CDN is suitable only for small institutions or for MathJax ## sponsors; see the MathJax website for more details. (Also, please ## note that if you plan to use MathJax on your site, you have to ## adapt CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS and ## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS configuration variables ## elsewhere in this file.) CFG_MATHJAX_HOSTING = local ################################# ## Part 16: BibEdit parameters ## ################################# ## CFG_BIBEDIT_TIMEOUT -- when a user edits a record, this record is ## locked to prevent other users to edit it at the same time. ## How many seconds of inactivity before the locked record again will be free ## for other people to edit? CFG_BIBEDIT_TIMEOUT = 3600 ## CFG_BIBEDIT_LOCKLEVEL -- when a user tries to edit a record which there ## is a pending bibupload task for in the queue, this shouldn't be permitted. ## The lock level determines how thouroughly the queue should be investigated ## to determine if this is the case. ## Level 0 - always permits editing, doesn't look at the queue ## (unsafe, use only if you know what you are doing) ## Level 1 - permits editing if there are no queued bibedit tasks for this record ## (safe with respect to bibedit, but not for other bibupload maintenance jobs) ## Level 2 - permits editing if there are no queued bibupload tasks of any sort ## (safe, but may lock more than necessary if many cataloguers around) ## Level 3 - permits editing if no queued bibupload task concerns given record ## (safe, most precise locking, but slow, ## checks for 001/EXTERNAL_SYSNO_TAG/EXTERNAL_OAIID_TAG) ## The recommended level is 3 (default) or 2 (if you use maintenance jobs often). CFG_BIBEDIT_LOCKLEVEL = 3 ## CFG_BIBEDIT_PROTECTED_FIELDS -- a comma-separated list of fields that BibEdit ## will not allow to be added, edited or deleted. Wildcards are not supported, ## but conceptually a wildcard is added at the end of every field specification. ## Examples: ## 500A - protect all MARC fields with tag 500 and first indicator A ## 5 - protect all MARC fields in the 500-series. ## 909C_a - protect subfield a in tag 909 with first indicator C and empty ## second indicator ## Note that 001 is protected by default, but if protection of other ## identifiers or automated fields is a requirement, they should be added to ## this list. CFG_BIBEDIT_PROTECTED_FIELDS = ## CFG_BIBEDIT_QUEUE_CHECK_METHOD -- how do we want to check for ## possible queue locking situations to prevent cataloguers from ## editing a record that may be waiting in the queue? Use 'bibrecord' ## for exact checking (always works, but may be slow), use 'regexp' ## for regular expression based checking (very fast, but may be ## inaccurate). When unsure, use 'bibrecord'. CFG_BIBEDIT_QUEUE_CHECK_METHOD = bibrecord ## CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE -- a dictionary ## containing which collections will be extended with a given template ## while being displayed in BibEdit UI. CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE = { 'Poetry' : 'poem'} ## CFG_BIBEDIT_KB_SUBJECTS - Name of the KB used in the field 65017a ## to automatically convert codes into extended version. e.g ## a - Astrophysics CFG_BIBEDIT_KB_SUBJECTS = Subjects ## CFG_BIBEDIT_KB_INSTITUTIONS - Name of the KB used for institution ## autocomplete. To be applied in fields defined in ## CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS CFG_BIBEDIT_KB_INSTITUTIONS = InstitutionsCollection ## CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS - list of fields to ## be autocompleted with the KB CFG_BIBEDIT_KB_INSTITUTIONS CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS = 100__u,700__u,701__u,502__c ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING -- maximum number of records ## that can be modified instantly using the multi-record editor. Above ## this limit, modifications will only be executed in limited hours. CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING = 2000 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING -- maximum number of records ## that can be send for modification without having a superadmin role. ## If the number of records is between CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING ## and this number, the modifications will take place only in limited hours. CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING = 20000 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME -- Allowed time to ## execute modifications on records, when the number exceeds ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING. CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME = 22:00-05:00 ################################### ## Part 17: BibUpload parameters ## ################################### ## CFG_BIBUPLOAD_REFERENCE_TAG -- where do we store references? CFG_BIBUPLOAD_REFERENCE_TAG = 999 ## CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG -- where do we store external ## system numbers? Useful for matching when our records come from an ## external digital library system. CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG = 970__a ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG -- where do we store OAI ID tags ## of harvested records? Useful for matching when we harvest stuff ## via OAI that we do not want to reexport via Invenio OAI; so records ## may have only the source OAI ID stored in this tag (kind of like ## external system number too). CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG = 035__a ## CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG -- where do we store OAI SRC ## tags of harvested records? Useful for matching when we harvest stuff ## via OAI that we do not want to reexport via Invenio OAI; so records ## may have only the source OAI SRC stored in this tag (kind of like ## external system number too). Note that the field should be the same of ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG. CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG = 035__9 ## CFG_BIBUPLOAD_STRONG_TAGS -- a comma-separated list of tags that ## are strong enough to resist the replace mode. Useful for tags that ## might be created from an external non-metadata-like source, ## e.g. the information about the number of copies left. CFG_BIBUPLOAD_STRONG_TAGS = 964 ## CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS -- a comma-separated list ## of tags that contain provenance information that should be checked ## in the bibupload correct mode via matching provenance codes. (Only ## field instances of the same provenance information would be acted ## upon.) Please specify the whole tag info up to subfield codes. CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS = 6531_9 ## CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS -- a comma-separated list of system ## paths from which it is allowed to take fulltextes that will be uploaded via ## FFT (CFG_TMPDIR is included by default). CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS = /tmp,/home ## CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS -- a dictionary containing external ## URLs that can be accessed by Invenio and specific HTTP headers that will be ## used for each URL. ## The keys of the dictionary are regular expressions matching a set of URLs, ## the values are dictionaries of headers as consumed by urllib2.Request. If a ## regular expression matching all URLs is created at the end of the list, it ## means that Invenio will download all URLs. Otherwise Invenio will just ## download authorized URLs. ## CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [ ## ('http://myurl.com/.*', {'User-Agent': 'Me'}), ## ('http://yoururl.com/.*', {'User-Agent': 'You', 'Accept': 'text/plain'}), ## ('http://.*', {'User-Agent': 'Invenio'}), ## ] CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [ ('http(s)?://.*', {'User-Agent': 'Invenio'}), ] ## CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE -- do we want to serialize ## internal representation of records (Pythonic record structure) into ## the database? This can improve internal processing speed of some ## operations at the price of somewhat bigger disk space usage. ## If you change this value after some records have already been added ## to your installation, you may want to run: ## $ /opt/invenio/bin/inveniocfg --reset-recstruct-cache ## in order to either erase the cache thus freeing database space, ## or to fill the cache for all records that have not been cached yet. CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE = 1 ## CFG_BIBUPLOAD_DELETE_FORMATS -- which formats do we want bibupload ## to delete when a record is ingested? Enter comma-separated list of ## formats. For example, 'hb,hd' will delete pre-formatted HTML brief ## and defailed formats from cache, so that search engine will ## generate them on-the-fly. Useful to always present latest data of ## records upon record display, until the periodical bibreformat job ## runs next and updates the cache. CFG_BIBUPLOAD_DELETE_FORMATS = hb ## CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY -- a comma-separated list ## indicating which fields match the file names of the documents to be ## uploaded. ## The matching will be done in the same order as the list provided. CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY = reportnumber,recid ## CFG_BATCHUPLOADER_DAEMON_DIR -- Directory where the batchuploader daemon ## will look for the subfolders metadata and document by default. ## If path is relative, CFG_PREFIX will be joined as a prefix CFG_BATCHUPLOADER_DAEMON_DIR = var/batchupload ## CFG_BATCHUPLOADER_WEB_ROBOT_AGENT -- Comma-separated list to specify the ## agents permitted when calling batch uploader web interface ## cdsweb.cern.ch/batchuploader/robotupload ## if using a curl, eg: curl xxx -A invenio_webupload CFG_BATCHUPLOADER_WEB_ROBOT_AGENT = invenio_webupload ## CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS -- Access list specifying for each ## IP address, which collections are allowed using batch uploader robot ## interface. CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS = { '10.0.0.1': ['BOOK', 'REPORT'], # Example 1 '10.0.0.2': ['POETRY', 'PREPRINT'], # Example 2 } #################################### ## Part 18: BibCatalog parameters ## #################################### ## CFG_BIBCATALOG_SYSTEM -- set desired catalog system. For example, RT. CFG_BIBCATALOG_SYSTEM = ## RT CONFIGURATION ## CFG_BIBCATALOG_SYSTEM_RT_CLI -- path to the RT CLI client CFG_BIBCATALOG_SYSTEM_RT_CLI = /usr/bin/rt ## CFG_BIBCATALOG_SYSTEM_RT_URL -- Base URL of the remote RT system CFG_BIBCATALOG_SYSTEM_RT_URL = http://localhost/rt3 ## CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER -- Set the username for a default RT account ## on remote system, with limited privileges, in order to only create and modify own tickets. CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER = ## CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD -- Set the password for the default RT account ## on remote system. CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD = #################################### ## Part 19: BibFormat parameters ## #################################### ## CFG_BIBFORMAT_HIDDEN_TAGS -- comma-separated list of MARC tags that ## are not shown to users not having cataloging authorizations. CFG_BIBFORMAT_HIDDEN_TAGS = 595 ## CFG_BIBFORMAT_ADDTHIS_ID -- if you want to use the AddThis service from ## <http://www.addthis.com/>, set this value to the pubid parameter as ## provided by the service (e.g. ra-4ff80aae118f4dad), and add a call to ## <BFE_ADDTHIS /> formatting element in your formats, for example ## Default_HTML_detailed.bft. CFG_BIBFORMAT_ADDTHIS_ID = +## CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS -- For each output +## format BibReformat currently creates a cache for only one language +## (CFG_SITE_LANG) per record. This means that visitors having set a +## different language than CFG_SITE_LANG will be served an on-the-fly +## output using the language of their choice. You can disable this +## behaviour by specifying below for which output format you would +## like to force the cache to be used whatever language is +## requested. If your format templates do not provide +## internationalization, you can optimize your site by setting for +## eg. hb,hd to always serve the precached output (if it exists) in +## the CFG_SITE_LANG +CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS = + #################################### ## Part 20: BibMatch parameters ## #################################### ## CFG_BIBMATCH_LOCAL_SLEEPTIME -- Determines the amount of seconds to sleep ## between search queries on LOCAL system. CFG_BIBMATCH_LOCAL_SLEEPTIME = 0.0 ## CFG_BIBMATCH_REMOTE_SLEEPTIME -- Determines the amount of seconds to sleep ## between search queries on REMOTE systems. CFG_BIBMATCH_REMOTE_SLEEPTIME = 2.0 ## CFG_BIBMATCH_FUZZY_WORDLIMITS -- Determines the amount of words to extract ## from a certain fields value during fuzzy matching mode. Add/change field ## and appropriate number to the dictionary to configure this. CFG_BIBMATCH_FUZZY_WORDLIMITS = { '100__a': 2, '245__a': 4 } ## CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT -- Determines the amount of empty results ## to accept during fuzzy matching mode. CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT = 1 ## CFG_BIBMATCH_QUERY_TEMPLATES -- Here you can set the various predefined querystrings ## used to standardize common matching queries. By default the following templates ## are given: ## title - standard title search. Taken from 245__a (default) ## title-author - title and author search (i.e. this is a title AND author a) ## Taken from 245__a and 100__a ## reportnumber - reportnumber search (i.e. reportnumber:REP-NO-123). CFG_BIBMATCH_QUERY_TEMPLATES = { 'title' : '[title]', 'title-author' : '[title] [author]', 'reportnumber' : 'reportnumber:[reportnumber]' } ## CFG_BIBMATCH_MATCH_VALIDATION_RULESETS -- Here you can define the various rulesets for ## validating search results done by BibMatch. Each ruleset contains a certain pattern mapped ## to a tuple defining a "matching-strategy". ## ## The rule-definitions must come in two parts: ## ## * The first part is a string containing a regular expression ## that is matched against the textmarc representation of each record. ## If a match is found, the final rule-set is updated with ## the given "sub rule-set", where identical tag rules are replaced. ## ## * The second item is a list of key->value mappings (dict) that indicates specific ## strategy parameters with corresponding validation rules. ## ## This strategy consists of five items: ## ## * MARC TAGS: ## These MARC tags represents the fields taken from original record and any records from search ## results. When several MARC tags are specified with a given match-strategy, all the fields ## associated with these tags are matched together (i.e. with key "100__a,700__a", all 100__a ## and 700__a fields are matched together. Which is useful when first-author can vary for ## certain records on different systems). ## ## * COMPARISON THRESHOLD: ## a value between 0.0 and 1.0 specifying the threshold for string matches ## to determine if it is a match or not (using normalized string-distance). ## Normally 0.8 (80% match) is considered to be a close match. ## ## * COMPARISON MODE: ## the parse mode decides how the record datafields are compared: ## - 'strict' : all (sub-)fields are compared, and all must match. Order is significant. ## - 'normal' : all (sub-)fields are compared, and all must match. Order is ignored. ## - 'lazy' : all (sub-)fields are compared with each other and at least one must match ## - 'ignored': the tag is ignored in the match. Used to disable previously defined rules. ## ## * MATCHING MODE: ## the comparison mode decides how the fieldvalues are matched: ## - 'title' : uses a method specialized for comparing titles, e.g. looking for subtitles ## - 'author' : uses a special authorname comparison. Will take initials into account. ## - 'identifier' : special matching for identifiers, stripping away punctuation ## - 'date': matches dates by extracting and comparing the year ## - 'normal': normal string comparison. ## Note: Fields are considered matching when all its subfields or values match. ## ## * RESULT MODE: ## the result mode decides how the results from the comparisons are handled further: ## - 'normal' : a failed match will cause the validation to immediately exit as a failure. ## a successful match will cause the validation to continue on other rules (if any) ## - 'final' : a failed match will cause the validation to immediately exit as a failure. ## a successful match will cause validation to immediately exit as a success. ## - 'joker' : a failed match will cause the validation to continue on other rules (if any). ## a successful match will cause validation to immediately exit as a success. ## ## You can add your own rulesets in the dictionary below. The 'default' ruleset is always applied, ## and should therefore NOT be removed, but can be changed. The tag-rules can also be overwritten ## by other rulesets. ## ## WARNING: Beware that the validation quality is only as good as given rules, so matching results ## are never guaranteed to be accurate, as it is very content-specific. CFG_BIBMATCH_MATCH_VALIDATION_RULESETS = [('default', [{ 'tags' : '245__%,242__%', 'threshold' : 0.8, 'compare_mode' : 'lazy', 'match_mode' : 'title', 'result_mode' : 'normal' }, { 'tags' : '037__a,088__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'identifier', 'result_mode' : 'final' }, { 'tags' : '100__a,700__a', 'threshold' : 0.8, 'compare_mode' : 'normal', 'match_mode' : 'author', 'result_mode' : 'normal' }, { 'tags' : '773__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'title', 'result_mode' : 'normal' }]), ('980__ \$\$a(THESIS|Thesis)', [{ 'tags' : '100__a', 'threshold' : 0.8, 'compare_mode' : 'strict', 'match_mode' : 'author', 'result_mode' : 'normal' }, { 'tags' : '700__a,701__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'author', 'result_mode' : 'normal' }, { 'tags' : '100__a,700__a', 'threshold' : 0.8, 'compare_mode' : 'ignored', 'match_mode' : 'author', 'result_mode' : 'normal' }]), ('260__', [{ 'tags' : '260__c', 'threshold' : 0.8, 'compare_mode' : 'lazy', 'match_mode' : 'date', 'result_mode' : 'normal' }]), ('0247_', [{ 'tags' : '0247_a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'identifier', 'result_mode' : 'final' }]), ('020__', [{ 'tags' : '020__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'identifier', 'result_mode' : 'joker' }]) ] ## CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT -- Determines the minimum percentage of the ## amount of rules to be positively matched when comparing two records. Should the number ## of matches be lower than required matches but equal to or above this limit, ## the match will be considered fuzzy. CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT = 0.65 ## CFG_BIBMATCH_SEARCH_RESULT_MATCH_LIMIT -- Determines the maximum amount of search results ## a single search can return before acting as a non-match. CFG_BIBMATCH_SEARCH_RESULT_MATCH_LIMIT = 15 ###################################### ## Part 21: BibAuthorID parameters ## ###################################### # CFG_BIBAUTHORID_MAX_PROCESSES is the max number of processes # that may be spawned by the disambiguation algorithm CFG_BIBAUTHORID_MAX_PROCESSES = 12 # CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS is the max number of threads # to parallelize sql queries during personID tables updates CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS = 12 # CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_BCTKD_RA is the minimum confidence needed # when backtracking automatically disambiguated authors to persons. # Values in [0,1] CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_BCTKD_RA = 0.5 # CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_NEW_RA is the threshold for # the confidence in a paper by the disambiguation algorithm to have it # automatically connected to a personID. Papers below the thresholds are # left disconnected from persons if not already connected in other ways. # values in [0,1] CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_NEW_RA = 0.5 # CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH minimum threshold for # disambiguated authors and persons: if less compatible than this the update # process will create a new person to associate to the found disambiguated author. CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH = 0.5 # CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH_P_N is a fallback mechanism # to force a merge if a certain percentage of papers is compatible no matter # what the confidences on the automatically disambiguated author looks like CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH_P_N = 0.5 # CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY defines the user info # keys for externally claimed records in an remote-login scenario--e.g. from arXiv.org # e.g. "external_arxivids" for arXiv SSO CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY = # CFG_BIBAUTHORID_ATTACH_VA_TO_MULTIPLE_RAS determines if the authorid # algorithm is allowed to attach a virtual author to multiple # real authors in the last run of the orphan processing. # Comma separated list of values. CFG_BIBAUTHORID_ATTACH_VA_TO_MULTIPLE_RAS = False # CFG_BIBAUTHORID_AID_ENABLED # Globally enable AuthorID Interfaces. # If False: No guest, user or operator will have access to the system. CFG_BIBAUTHORID_ENABLED = True # CFG_BIBAUTHORID_AID_ON_AUTHORPAGES # Enable AuthorID information on the author pages. CFG_BIBAUTHORID_ON_AUTHORPAGES = True # CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL defines the eMail address # all ticket requests concerning authors will be sent to. CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL = info@invenio-software.org #CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE defines if the optional arXive stub page is skipped CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE = False ###################################### ## Part 22: BibClassify parameters ## ###################################### # CFG_BIBCLASSIFY_WEB_MAXKW -- maximum number of keywords to display # in the Keywords tab web page. CFG_BIBCLASSIFY_WEB_MAXKW = 100 ######################################## ## Part 23: Plotextractor parameters ## ######################################## ## CFG_PLOTEXTRACTOR_SOURCE_BASE_URL -- for acquiring source tarballs for plot ## extraction, where should we look? If nothing is set, we'll just go ## to arXiv, but this can be a filesystem location, too CFG_PLOTEXTRACTOR_SOURCE_BASE_URL = http://arxiv.org/ ## CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER -- for acquiring source tarballs for plot ## extraction, subfolder where the tarballs sit CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER = e-print/ ## CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER -- for acquiring source tarballs for plot ## extraction, subfolder where the pdf sit CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER = pdf/ ## CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT -- a float representing the number of seconds ## to wait between each download of pdf and/or tarball from source URL. CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT = 2.0 ## CFG_PLOTEXTRACTOR_CONTEXT_LIMIT -- when extracting context of plots from ## TeX sources, this is the limitation of characters in each direction to extract ## context from. Default 750. CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT = 750 ## CFG_PLOTEXTRACTOR_DISALLOWED_TEX -- when extracting context of plots from TeX ## sources, this is the list of TeX tags that will trigger 'end of context'. CFG_PLOTEXTRACTOR_DISALLOWED_TEX = begin,end,section,includegraphics,caption,acknowledgements ## CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT -- when extracting context of plots from ## TeX sources, this is the limitation of words in each direction. Default 75. CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT = 75 ## CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT -- when extracting context of plots from ## TeX sources, this is the limitation of sentences in each direction. Default 2. CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT = 2 ###################################### ## Part 24: WebStat parameters ## ###################################### # CFG_WEBSTAT_BIBCIRCULATION_START_YEAR defines the start date of the BibCirculation # statistics. Value should have the format 'yyyy'. If empty, take all existing data. CFG_WEBSTAT_BIBCIRCULATION_START_YEAR = #################################### ## Part 25: BibSort parameters ## #################################### ## CFG_BIBSORT_BUCKETS -- the number of buckets bibsort should use. ## If 0, then no buckets will be used (bibsort will be inactive). ## If different from 0, bibsort will be used for sorting the records. ## The number of buckets should be set with regards to the size ## of the repository; having a larger number of buckets will increase ## the sorting performance for the top results but will decrease ## the performance for sorting the middle results. ## We recommend to to use 1 in case you have less than about ## 1,000,000 records. ## When modifying this variable, re-run rebalancing for all the bibsort ## methods, for having the database in synch. CFG_BIBSORT_BUCKETS = 1 ########################## ## THAT's ALL, FOLKS! ## ########################## diff --git a/modules/bibformat/lib/bibformat.py b/modules/bibformat/lib/bibformat.py index dc13f30ac..6f6552d96 100644 --- a/modules/bibformat/lib/bibformat.py +++ b/modules/bibformat/lib/bibformat.py @@ -1,583 +1,583 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Format records using chosen format. The main APIs are: - format_record - format_records - create_excel - get_output_format_content_type This module wraps the BibFormat engine and its associated functions. This is also where special formatting functions of multiple records (that the engine does not handle, as it works on a single record basis) should be defined, with name C{def create_*}. @see: bibformat_utils.py """ __revision__ = "$Id$" import zlib from invenio import bibformat_dblayer from invenio import bibformat_engine from invenio import bibformat_utils from invenio.errorlib import register_exception from invenio.config import \ CFG_SITE_LANG, \ CFG_PATH_PHP, \ CFG_SITE_URL, \ CFG_BIBFORMAT_HIDDEN_TAGS, \ - CFG_SITE_RECORD + CFG_SITE_RECORD, \ + CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS from invenio.bibformat_config import \ - CFG_BIBFORMAT_USE_OLD_BIBFORMAT, \ - CFG_BIBFORMAT_ENABLE_I18N_BRIEF_FORMAT + CFG_BIBFORMAT_USE_OLD_BIBFORMAT from invenio.access_control_engine import acc_authorize_action import getopt import sys # Functions to format a single record ## def format_record(recID, of, ln=CFG_SITE_LANG, verbose=0, search_pattern=None, xml_record=None, user_info=None, on_the_fly=False): """ Format a record in given output format. Return a formatted version of the record in the specified language, search pattern, and with the specified output format. The function will define which format template must be applied. The record to be formatted can be specified with its ID (with 'recID' parameter) or given as XML representation (with 'xml_record' parameter). If 'xml_record' is specified 'recID' is ignored (but should still be given for reference. A dummy recid 0 or -1 could be used). 'user_info' allows to grant access to some functionalities on a page depending on the user's priviledges. The 'user_info' object makes sense only in the case of on-the-fly formatting. 'user_info' is the same object as the one returned by 'webuser.collect_user_info(req)' @param recID: the ID of record to format. @type recID: int @param of: an output format code (or short identifier for the output format) @type of: string @param ln: the language to use to format the record @type ln: string @param verbose: the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, stop if error in format elements 9: errors and warnings, stop if error (debug mode )) @type verbose: int @param search_pattern: list of strings representing the user request in web interface @type search_pattern: list(string) @param xml_record: an xml string represention of the record to format @type xml_record: string or None @param user_info: the information of the user who will view the formatted page (if applicable) @param on_the_fly: if False, try to return an already preformatted version of the record in the database @type on_the_fly: boolean @return: formatted record @rtype: string """ from invenio.search_engine import record_exists if search_pattern is None: search_pattern = [] out = "" if verbose == 9: out += """\n<span class="quicknote"> Formatting record %i with output format %s. </span>""" % (recID, of) ############### FIXME: REMOVE WHEN MIGRATION IS DONE ############### if CFG_BIBFORMAT_USE_OLD_BIBFORMAT and CFG_PATH_PHP: return bibformat_engine.call_old_bibformat(recID, of=of, on_the_fly=on_the_fly) ############################# END ################################## if not on_the_fly and \ (ln == CFG_SITE_LANG or \ of.lower() == 'xm' or \ CFG_BIBFORMAT_USE_OLD_BIBFORMAT or \ - (CFG_BIBFORMAT_ENABLE_I18N_BRIEF_FORMAT == False and of.lower() == 'hb')) and \ + (of.lower() in CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS)) and \ record_exists(recID) != -1: - # Try to fetch preformatted record Only possible for records + # Try to fetch preformatted record. Only possible for records # formatted in CFG_SITE_LANG language (other are never # stored), or of='xm' which does not depend on language. - # Also, when formatting in HB, and when - # CFG_BIBFORMAT_ENABLE_I18N_BRIEF_FORMAT is set to False, - # ignore other languages and fetch the preformatted output. - # Also, do not fetch from DB when record has been deleted: we - # want to return an "empty" record in that case + # Exceptions are made for output formats defined in + # CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS, which are + # always served from the same cache for any language. Also, + # do not fetch from DB when record has been deleted: we want + # to return an "empty" record in that case res = bibformat_dblayer.get_preformatted_record(recID, of) if res is not None: # record 'recID' is formatted in 'of', so return it if verbose == 9: last_updated = bibformat_dblayer.get_preformatted_record_date(recID, of) out += """\n<br/><span class="quicknote"> Found preformatted output for record %i (cache updated on %s). </span><br/>""" % (recID, last_updated) if of.lower() == 'xm': res = filter_hidden_fields(res, user_info) out += res return out else: if verbose == 9: out += """\n<br/><span class="quicknote"> No preformatted output found for record %s. </span>"""% recID # Live formatting of records in all other cases if verbose == 9: out += """\n<br/><span class="quicknote"> Formatting record %i on-the-fly. </span>""" % recID try: out += bibformat_engine.format_record(recID=recID, of=of, ln=ln, verbose=verbose, search_pattern=search_pattern, xml_record=xml_record, user_info=user_info) if of.lower() == 'xm': out = filter_hidden_fields(out, user_info) return out except Exception, e: register_exception(prefix="An error occured while formatting record %i in %s" % \ (recID, of), alert_admin=True) #Failsafe execution mode import invenio.template websearch_templates = invenio.template.load('websearch') if verbose == 9: out += """\n<br/><span class="quicknote"> An error occured while formatting record %i. (%s) </span>""" % (recID, str(e)) if of.lower() == 'hd': if verbose == 9: out += """\n<br/><span class="quicknote"> Formatting record %i with websearch_templates.tmpl_print_record_detailed. </span><br/>""" % recID return out + websearch_templates.tmpl_print_record_detailed( ln = ln, recID = recID, ) if verbose == 9: out += """\n<br/><span class="quicknote"> Formatting record %i with websearch_templates.tmpl_print_record_brief. </span><br/>""" % recID return out + websearch_templates.tmpl_print_record_brief(ln = ln, recID = recID, ) def record_get_xml(recID, format='xm', decompress=zlib.decompress): """ Returns an XML string of the record given by recID. The function builds the XML directly from the database, without using the standard formatting process. 'format' allows to define the flavour of XML: - 'xm' for standard XML - 'marcxml' for MARC XML - 'oai_dc' for OAI Dublin Core - 'xd' for XML Dublin Core If record does not exist, returns empty string. @param recID: the id of the record to retrieve @param format: the format to use @param decompress: the library to use to decompress cache from DB @return: the xml string of the record """ return bibformat_utils.record_get_xml(recID=recID, format=format, decompress=decompress) # Helper functions to do complex formatting of multiple records # # You should not modify format_records when adding a complex # formatting of multiple records, but add a create_* method # that relies on format_records to do the formatting. ## def format_records(recIDs, of, ln=CFG_SITE_LANG, verbose=0, search_pattern=None, xml_records=None, user_info=None, record_prefix=None, record_separator=None, record_suffix=None, prologue="", epilogue="", req=None, on_the_fly=False): """ Format records given by a list of record IDs or a list of records as xml. Adds a prefix before each record, a suffix after each record, plus a separator between records. Also add optional prologue and epilogue to the complete formatted list. You can either specify a list of record IDs to format, or a list of xml records, but not both (if both are specified recIDs is ignored). 'record_separator' is a function that returns a string as separator between records. The function must take an integer as unique parameter, which is the index in recIDs (or xml_records) of the record that has just been formatted. For example separator(i) must return the separator between recID[i] and recID[i+1]. Alternatively separator can be a single string, which will be used to separate all formatted records. The same applies to 'record_prefix' and 'record_suffix'. 'req' is an optional parameter on which the result of the function are printed lively (prints records after records) if it is given. Note that you should set 'req' content-type by yourself, and send http header before calling this function as it will not do it. This function takes the same parameters as 'format_record' except for: @param recIDs: a list of record IDs @type recIDs: list(int) @param of: an output format code (or short identifier for the output format) @type of: string @param ln: the language to use to format the record @type ln: string @param verbose: the level of verbosity from 0 to 9 (0: silent, 5: errors, 7: errors and warnings, stop if error in format elements 9: errors and warnings, stop if error (debug mode )) @type verbose: int @param search_pattern: list of strings representing the user request in web interface @type search_pattern: list(string) @param user_info: the information of the user who will view the formatted page (if applicable) @param xml_records: a list of xml string representions of the records to format @type xml_records: list(string) @param record_prefix: a string printed before B{each} formatted records (n times) @type record_prefix: string @param record_suffix: a string printed after B{each} formatted records (n times) @type record_suffix: string @param prologue: a string printed at the beginning of the complete formatted records (1x) @type prologue: string @param epilogue: a string printed at the end of the complete formatted output (1x) @type epilogue: string @param record_separator: either a string or a function that returns string to join formatted records @param record_separator: string or function @param req: an optional request object where to print records @param on_the_fly: if False, try to return an already preformatted version of the record in the database @type on_the_fly: boolean @rtype: string """ if req is not None: req.write(prologue) formatted_records = '' #Fill one of the lists with Nones if xml_records is not None: recIDs = map(lambda x:None, xml_records) else: xml_records = map(lambda x:None, recIDs) total_rec = len(recIDs) last_iteration = False for i in range(total_rec): if i == total_rec - 1: last_iteration = True #Print prefix if record_prefix is not None: if isinstance(record_prefix, str): formatted_records += record_prefix if req is not None: req.write(record_prefix) else: string_prefix = record_prefix(i) formatted_records += string_prefix if req is not None: req.write(string_prefix) #Print formatted record formatted_record = format_record(recIDs[i], of, ln, verbose, \ search_pattern, xml_records[i],\ user_info, on_the_fly) formatted_records += formatted_record if req is not None: req.write(formatted_record) #Print suffix if record_suffix is not None: if isinstance(record_suffix, str): formatted_records += record_suffix if req is not None: req.write(record_suffix) else: string_suffix = record_suffix(i) formatted_records += string_suffix if req is not None: req.write(string_suffix) #Print separator if needed if record_separator is not None and not last_iteration: if isinstance(record_separator, str): formatted_records += record_separator if req is not None: req.write(record_separator) else: string_separator = record_separator(i) formatted_records += string_separator if req is not None: req.write(string_separator) if req is not None: req.write(epilogue) return prologue + formatted_records + epilogue def create_excel(recIDs, req=None, ln=CFG_SITE_LANG, ot=None, ot_sep="; "): """ Returns an Excel readable format containing the given recIDs. If 'req' is given, also prints the output in 'req' while individual records are being formatted. This method shows how to create a custom formatting of multiple records. The excel format is a basic HTML table that most spreadsheets applications can parse. If 'ot' is given, the BibFormat engine is overridden and the output is produced on the basis of the fields that 'ot' defines (see search_engine.perform_request_search(..) 'ot' param). @param req: the request object @param recIDs: a list of record IDs @param ln: language @param ot: a list of fields that should be included in the excel output as columns(see perform_request_search 'ot' param) @param ot_sep: a separator used to separate values for the same record, in the same columns, if any @return: a string in Excel format """ # Prepare the column headers to display in the Excel file column_headers_list = ['Title', 'Authors', 'Addresses', 'Affiliation', 'Date', 'Publisher', 'Place', 'Abstract', 'Keywords', 'Notes'] # Prepare Content column_headers = '</b></td><td style="border-color:black; border-style:solid; border-width:thin; background-color:black;color:white"><b>'.join(column_headers_list) + '' column_headers = '<table style="border-collapse: collapse;">\n'+ '<td style="border-color:black; border-style:solid; border-width:thin; background-color:black;color:white"><b>' + column_headers + '</b></td>' footer = '</table>' # Apply content_type and print column headers if req is not None: req.content_type = get_output_format_content_type('excel') req.headers_out["Content-Disposition"] = "inline; filename=%s" % 'results.xls' req.send_http_header() if ot is not None and len(ot) > 0: # Skip BibFormat engine, produce our own output based on # specified fields. Each field will be a column of the # output. If a field has multiple values, then they are joined # into the same cell. out = "<table>" if req: req.write("<table>") for recID in recIDs: row = '<tr>' row += '<td><a href="%(CFG_SITE_URL)s/%(CFG_SITE_RECORD)s/%(recID)i">%(recID)i</a></td>' % \ {'recID': recID, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'CFG_SITE_URL': CFG_SITE_URL} for field in ot: row += '<td>' + \ ot_sep.join(bibformat_utils.get_all_fieldvalues(recID, field)) + \ '</td>' row += '</tr>' out += row if req: req.write(row) out += '</table>' if req: req.write('</table>') return out #Format the records excel_formatted_records = format_records(recIDs, 'excel', ln=CFG_SITE_LANG, record_separator='\n', prologue = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><table>', epilogue = footer, req=req) return excel_formatted_records # Utility functions ## def filter_hidden_fields(recxml, user_info=None, filter_tags=CFG_BIBFORMAT_HIDDEN_TAGS, force_filtering=False): """ Filter out tags specified by filter_tags from MARCXML. If the user is allowed to run bibedit, then filter nothing, unless force_filtering is set to True. @param recxml: marcxml presentation of the record @param user_info: user information; if None, then assume invoked via CLI with all rights @param filter_tags: list of MARC tags to be filtered @param force_filtering: do we force filtering regardless of user rights? @return: recxml without the hidden fields """ if force_filtering: pass else: if user_info is None: #by default return recxml else: if (acc_authorize_action(user_info, 'runbibedit')[0] == 0): #no need to filter return recxml #filter.. out = "" omit = False for line in recxml.splitlines(True): #check if this block needs to be omitted for htag in filter_tags: if line.count('datafield tag="'+str(htag)+'"'): omit = True if not omit: out += line if omit and line.count('</datafield>'): omit = False return out def get_output_format_content_type(of): """ Returns the content type (for example 'text/html' or 'application/ms-excel') \ of the given output format. @param of: the code of output format for which we want to get the content type @return: the content-type to use for this output format """ content_type = bibformat_dblayer.get_output_format_content_type(of) if content_type == '': content_type = 'text/html' return content_type def usage(exitcode=1, msg=""): """ Prints usage info. @param exitcode: exit code to use (eg. 1 for error, 0 for okay) @param msg: message to print @return: exit the process """ if msg: sys.stderr.write("Error: %s.\n" % msg) print """BibFormat: outputs the result of the formatting of a record. Usage: bibformat required [options] Examples: $ bibformat -i 10 -o HB $ bibformat -i 10,11,13 -o HB $ bibformat -i 10:13 $ bibformat -i 10 -o HB -v 9 Required: -i, --id=ID[ID2,ID3:ID5] ID (or range of IDs) of the record(s) to be formatted. Options: -o, --output=CODE short code of the output format used for formatting (default HB). -l, --lang=LN language used for formatting. -y, --onthefly on-the-fly formatting, avoiding caches created by BibReformat. General options: -h, --help print this help and exit -v, --verbose=LEVEL verbose level (from 0 to 9, default 0) -V --version print the script version """ sys.exit(exitcode) def main(): """ Main entry point for biformat via command line @return: formatted record(s) as specified by options, or help/errors """ options = {} # will hold command-line options options["verbose"] = 0 options["onthefly"] = False options["lang"] = CFG_SITE_LANG options["output"] = "HB" options["recID"] = None try: opts, args = getopt.getopt(sys.argv[1:], "hVv:yl:i:o:", ["help", "version", "verbose=", "onthefly", "lang=", "id=", "output="]) except getopt.GetoptError, err: usage(1, err) pass try: for opt in opts: if opt[0] in ["-h", "--help"]: usage(0) elif opt[0] in ["-V", "--version"]: print __revision__ sys.exit(0) elif opt[0] in ["-v", "--verbose"]: options["verbose"] = int(opt[1]) elif opt[0] in ["-y", "--onthefly"]: options["onthefly"] = True elif opt[0] in ["-l", "--lang"]: options["lang"] = opt[1] elif opt[0] in ["-i", "--id"]: recIDs = [] for recID in opt[1].split(','): if ":" in recID: start = int(recID.split(':')[0]) end = int(recID.split(':')[1]) recIDs.extend(range(start, end)) else: recIDs.append(int(recID)) options["recID"] = recIDs elif opt[0] in ["-o", "--output"]: options["output"] = opt[1] if options["recID"] == None: usage(1, "-i argument is needed") except StandardError, e: usage(e) print format_records(recIDs=options["recID"], of=options["output"], ln=options["lang"], verbose=options["verbose"], on_the_fly=options["onthefly"]) return if __name__ == "__main__": main() diff --git a/modules/bibformat/lib/bibformat_config.py b/modules/bibformat/lib/bibformat_config.py index 85d60e5b6..fd3b360ab 100644 --- a/modules/bibformat/lib/bibformat_config.py +++ b/modules/bibformat/lib/bibformat_config.py @@ -1,75 +1,62 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0301 """BibFormat configuration parameters.""" __revision__ = "$Id$" import os from invenio.config import CFG_ETCDIR, CFG_PYLIBDIR # True if old php format written in EL must be used by Invenio. # False if new python format must be used. If set to 'False' but # new format cannot be found, old format will be used. CFG_BIBFORMAT_USE_OLD_BIBFORMAT = False -# Enable internationalization of brief format (HB). When set to 'True', -# BibFormat will try to display each record of the search results list -# in the language chosen by the user. This currently means that the -# formatting of each record will be done on-the-fly for each language -# different from CFG_SITE_LANG, as bibreformat precreates formatted -# output in this language only. If set to 'False', the cache created by -# bibreformat will be used independently of the language chosen by the -# user. You might want to set this setting to True if your users comes -# from various language zones and if you provide language-dependant -# content in the brief format. Also consider the impact on the -# performance of your server to have on-the-fly formatting enabled. -CFG_BIBFORMAT_ENABLE_I18N_BRIEF_FORMAT = True - # Paths to main formats directories CFG_BIBFORMAT_TEMPLATES_PATH = "%s%sbibformat%sformat_templates" % (CFG_ETCDIR, os.sep, os.sep) CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH = "invenio.bibformat_elements" CFG_BIBFORMAT_ELEMENTS_PATH = "%s%sinvenio%sbibformat_elements" % (CFG_PYLIBDIR, os.sep, os.sep) CFG_BIBFORMAT_OUTPUTS_PATH = "%s%sbibformat%soutput_formats" % (CFG_ETCDIR, os.sep, os.sep) # File extensions of formats CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION = "bft" CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION = "bfo" # Exceptions: errors class InvenioBibFormatError(Exception): """A generic error for BibFormat.""" def __init__(self, message): """Initialisation.""" self.message = message def __str__(self): """String representation.""" return repr(self.message) # Exceptions: warnings class InvenioBibFormatWarning(Exception): """A generic warning for BibFormat.""" def __init__(self, message): """Initialisation.""" self.message = message def __str__(self): """String representation.""" return repr(self.message) diff --git a/modules/miscutil/lib/inveniocfg.py b/modules/miscutil/lib/inveniocfg.py index c2aad5f83..48889cd62 100644 --- a/modules/miscutil/lib/inveniocfg.py +++ b/modules/miscutil/lib/inveniocfg.py @@ -1,1342 +1,1343 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Invenio configuration and administration CLI tool. Usage: inveniocfg [options] General options: -h, --help print this help -V, --version print version number Options to finish your installation: --create-apache-conf create Apache configuration files --create-tables create DB tables for Invenio --load-webstat-conf load the WebStat configuration --drop-tables drop DB tables of Invenio --check-openoffice check for correctly set up of openoffice temporary directory Options to set up and test a demo site: --create-demo-site create demo site --load-demo-records load demo records --remove-demo-records remove demo records, keeping demo site --drop-demo-site drop demo site configurations too --run-unit-tests run unit test suite (needs demo site) --run-regression-tests run regression test suite (needs demo site) --run-web-tests run web tests in a browser (needs demo site, Firefox, Selenium IDE) Options to update config files in situ: --update-all perform all the update options --update-config-py update config.py file from invenio.conf file --update-dbquery-py update dbquery.py with DB credentials from invenio.conf --update-dbexec update dbexec with DB credentials from invenio.conf --update-bibconvert-tpl update bibconvert templates with CFG_SITE_URL from invenio.conf --update-web-tests update web test cases with CFG_SITE_URL from invenio.conf Options to update DB tables: --reset-all perform all the reset options --reset-sitename reset tables to take account of new CFG_SITE_NAME* --reset-siteadminemail reset tables to take account of new CFG_SITE_ADMIN_EMAIL --reset-fieldnames reset tables to take account of new I18N names from PO files --reset-recstruct-cache reset record structure cache according to CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE Options to help the work: --list print names and values of all options from conf files --get <some-opt> get value of a given option from conf files --conf-dir </some/path> path to directory where invenio*.conf files are [optional] --detect-system-details print system details such as Apache/Python/MySQL versions """ __revision__ = "$Id$" from ConfigParser import ConfigParser import os import re import shutil import socket import sys def print_usage(): """Print help.""" print __doc__ def print_version(): """Print version information.""" print __revision__ def convert_conf_option(option_name, option_value): """ Convert conf option into Python config.py line, converting values to ints or strings as appropriate. """ ## 1) convert option name to uppercase: option_name = option_name.upper() ## 2) convert option value to int or string: if option_name in ['CFG_BIBUPLOAD_REFERENCE_TAG', 'CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG', 'CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG', 'CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG', 'CFG_BIBUPLOAD_STRONG_TAGS', 'CFG_BIBFORMAT_HIDDEN_TAGS',]: # some options are supposed be string even when they look like # numeric option_value = '"' + option_value + '"' else: try: option_value = int(option_value) except ValueError: option_value = '"' + option_value + '"' ## 3a) special cases: chars regexps if option_name in ['CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS', 'CFG_BIBINDEX_CHARS_PUNCTUATION']: option_value = 'r"[' + option_value[1:-1] + ']"' ## 3abis) special cases: real regexps if option_name in ['CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES']: option_value = 'r"' + option_value[1:-1] + '"' ## 3b) special cases: True, False, None if option_value in ['"True"', '"False"', '"None"']: option_value = option_value[1:-1] ## 3c) special cases: dicts if option_name in ['CFG_WEBSEARCH_FIELDS_CONVERT', 'CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS', 'CFG_SITE_EMERGENCY_EMAIL_ADDRESSES', 'CFG_BIBMATCH_FUZZY_WORDLIMITS', 'CFG_BIBMATCH_QUERY_TEMPLATES', 'CFG_WEBSEARCH_SYNONYM_KBRS', 'CFG_BIBINDEX_SYNONYM_KBRS', 'CFG_WEBCOMMENT_EMAIL_REPLIES_TO', 'CFG_WEBCOMMENT_RESTRICTION_DATAFIELD', 'CFG_WEBCOMMENT_ROUND_DATAFIELD', 'CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS', 'CFG_BIBSCHED_NODE_TASKS', 'CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE', 'CFG_OAI_METADATA_FORMATS']: option_value = option_value[1:-1] ## 3cbis) very special cases: dicts with backward compatible string if option_name in ['CFG_BIBINDEX_SPLASH_PAGES']: if option_value.startswith('"{') and option_value.endswith('}"'): option_value = option_value[1:-1] else: option_value = """{%s: ".*"}""" % option_value ## 3d) special cases: comma-separated lists if option_name in ['CFG_SITE_LANGS', 'CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS', 'CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS', 'CFG_BIBUPLOAD_STRONG_TAGS', 'CFG_BIBFORMAT_HIDDEN_TAGS', 'CFG_BIBSCHED_GC_TASKS_TO_REMOVE', 'CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE', 'CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS', 'CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS', 'CFG_BIBUPLOAD_DELETE_FORMATS', 'CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES', 'CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST', 'CFG_WEBSEARCH_RSS_I18N_COLLECTIONS', 'CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY', 'CFG_BATCHUPLOADER_WEB_ROBOT_AGENT', 'CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY', 'CFG_PLOTEXTRACTOR_DISALLOWED_TEX', 'CFG_OAI_FRIENDS', 'CFG_WEBSTYLE_REVERSE_PROXY_IPS', - 'CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS']: + 'CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS', + 'CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS']: out = "[" for elem in option_value[1:-1].split(","): if elem: if option_name in ['CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES']: # 3d1) integer values out += "%i, " % int(elem) else: # 3d2) string values out += "'%s', " % elem out += "]" option_value = out ## 3e) special cases: multiline if option_name == 'CFG_OAI_IDENTIFY_DESCRIPTION': # make triple quotes option_value = '""' + option_value + '""' ## 3f) ignore some options: if option_name.startswith('CFG_SITE_NAME_INTL'): # treated elsewhere return ## 3g) special cases: float if option_name in ['CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY', 'CFG_BIBMATCH_LOCAL_SLEEPTIME', 'CFG_BIBMATCH_REMOTE_SLEEPTIME', 'CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_BCTKD_RA', 'CFG_BIBAUTHORID_PERSONID_MIN_P_FROM_NEW_RA', 'CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH', 'CFG_BIBAUTHORID_PERSONID_MAX_COMP_LIST_MIN_TRSH_P_N', 'CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT', 'CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT']: option_value = float(option_value[1:-1]) ## 3h) special cases: bibmatch validation list if option_name in ['CFG_BIBMATCH_MATCH_VALIDATION_RULESETS']: option_value = option_value[1:-1] ## 4) finally, return output line: return '%s = %s' % (option_name, option_value) def cli_cmd_update_config_py(conf): """ Update new config.py from conf options, keeping previous config.py in a backup copy. """ print ">>> Going to update config.py..." ## location where config.py is: configpyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'config.py' ## backup current config.py file: if os.path.exists(configpyfile): shutil.copy(configpyfile, configpyfile + '.OLD') ## here we go: fdesc = open(configpyfile, 'w') ## generate preamble: fdesc.write("# -*- coding: utf-8 -*-\n") fdesc.write("# DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED\n") fdesc.write("# FROM INVENIO.CONF BY EXECUTING:\n") fdesc.write("# " + " ".join(sys.argv) + "\n") ## special treatment for CFG_SITE_NAME_INTL options: fdesc.write("CFG_SITE_NAME_INTL = {}\n") for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): fdesc.write("CFG_SITE_NAME_INTL['%s'] = \"%s\"\n" % (lang, conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang))) ## special treatment for CFG_SITE_SECURE_URL that may be empty, in ## which case it should be put equal to CFG_SITE_URL: if not conf.get("Invenio", "CFG_SITE_SECURE_URL"): conf.set("Invenio", "CFG_SITE_SECURE_URL", conf.get("Invenio", "CFG_SITE_URL")) ## process all the options normally: sections = conf.sections() sections.sort() for section in sections: options = conf.options(section) options.sort() for option in options: if not option.startswith('CFG_DATABASE_'): # put all options except for db credentials into config.py line_out = convert_conf_option(option, conf.get(section, option)) if line_out: fdesc.write(line_out + "\n") ## FIXME: special treatment for experimental variables ## CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES and CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE ## (not offering them in invenio.conf since they will be refactored) fdesc.write("CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE = 0\n") fdesc.write("CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES = [0, 1,]\n") ## generate postamble: fdesc.write("") fdesc.write("# END OF GENERATED FILE") ## we are done: fdesc.close() print "You may want to restart Apache now." print ">>> config.py updated successfully." def cli_cmd_update_dbquery_py(conf): """ Update lib/dbquery.py file with DB parameters read from conf file. Note: this edits dbquery.py in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbquery.py..." ## location where dbquery.py is: dbquerypyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'dbquery.py' ## backup current dbquery.py file: if os.path.exists(dbquerypyfile): shutil.copy(dbquerypyfile, dbquerypyfile + '.OLD') ## replace db parameters: out = '' for line in open(dbquerypyfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|PORT|NAME|USER|PASS)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get('Invenio', dbparam)) else: out += line fdesc = open(dbquerypyfile, 'w') fdesc.write(out) fdesc.close() print "You may want to restart Apache now." print ">>> dbquery.py updated successfully." def cli_cmd_update_dbexec(conf): """ Update bin/dbexec file with DB parameters read from conf file. Note: this edits dbexec in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbexec..." ## location where dbexec is: dbexecfile = conf.get("Invenio", "CFG_BINDIR") + \ os.sep + 'dbexec' ## backup current dbexec file: if os.path.exists(dbexecfile): shutil.copy(dbexecfile, dbexecfile + '.OLD') ## replace db parameters via sed: out = '' for line in open(dbexecfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|PORT|NAME|USER|PASS)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get("Invenio", dbparam)) else: out += line fdesc = open(dbexecfile, 'w') fdesc.write(out) fdesc.close() print ">>> dbexec updated successfully." def cli_cmd_update_bibconvert_tpl(conf): """ Update bibconvert/config/*.tpl files looking for 856 http://.../CFG_SITE_RECORD lines, replacing URL with CFG_SITE_URL taken from conf file. Note: this edits tpl files in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update bibconvert templates..." ## location where bibconvert/config/*.tpl are: tpldir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'bibconvert' + os.sep + 'config' ## find all *.tpl files: for tplfilename in os.listdir(tpldir): if tplfilename.endswith(".tpl"): ## change tpl file: tplfile = tpldir + os.sep + tplfilename shutil.copy(tplfile, tplfile + '.OLD') out = '' for line in open(tplfile, 'r').readlines(): match = re.search(r'^(.*)http://.*?/%s/(.*)$' % conf.get("Invenio", 'CFG_SITE_RECORD'), line) if match: out += "%s%s/%s/%s\n" % (match.group(1), conf.get("Invenio", 'CFG_SITE_URL'), conf.get("Invenio", 'CFG_SITE_RECORD'), match.group(2)) else: out += line fdesc = open(tplfile, 'w') fdesc.write(out) fdesc.close() print ">>> bibconvert templates updated successfully." def cli_cmd_update_web_tests(conf): """ Update web test cases lib/webtest/test_*.html looking for <td>http://.+?[</] strings and replacing them with CFG_SITE_URL taken from conf file. Note: this edits test files in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update web tests..." ## location where test_*.html files are: testdir = conf.get("Invenio", 'CFG_PREFIX') + os.sep + \ 'lib' + os.sep + 'webtest' + os.sep + 'invenio' ## find all test_*.html files: for testfilename in os.listdir(testdir): if testfilename.startswith("test_") and \ testfilename.endswith(".html"): ## change test file: testfile = testdir + os.sep + testfilename shutil.copy(testfile, testfile + '.OLD') out = '' for line in open(testfile, 'r').readlines(): match = re.search(r'^(.*<td>)http://.+?([</].*)$', line) if match: out += "%s%s%s\n" % (match.group(1), conf.get("Invenio", 'CFG_SITE_URL'), match.group(2)) else: match = re.search(r'^(.*<td>)/opt/invenio(.*)$', line) if match: out += "%s%s%s\n" % (match.group(1), conf.get("Invenio", 'CFG_PREFIX'), match.group(2)) else: out += line fdesc = open(testfile, 'w') fdesc.write(out) fdesc.close() print ">>> web tests updated successfully." def cli_cmd_reset_sitename(conf): """ Reset collection-related tables with new CFG_SITE_NAME and CFG_SITE_NAME_INTL* read from conf files. """ print ">>> Going to reset CFG_SITE_NAME and CFG_SITE_NAME_INTL..." from invenio.dbquery import run_sql, IntegrityError # reset CFG_SITE_NAME: sitename = conf.get("Invenio", "CFG_SITE_NAME") try: run_sql("""INSERT INTO collection (id, name, dbquery, reclist) VALUES (1,%s,NULL,NULL)""", (sitename,)) except IntegrityError: run_sql("""UPDATE collection SET name=%s WHERE id=1""", (sitename,)) # reset CFG_SITE_NAME_INTL: for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): sitename_lang = conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang) try: run_sql("""INSERT INTO collectionname (id_collection, ln, type, value) VALUES (%s,%s,%s,%s)""", (1, lang, 'ln', sitename_lang)) except IntegrityError: run_sql("""UPDATE collectionname SET value=%s WHERE ln=%s AND id_collection=1 AND type='ln'""", (sitename_lang, lang)) print "You may want to restart Apache now." print ">>> CFG_SITE_NAME and CFG_SITE_NAME_INTL* reset successfully." def cli_cmd_reset_recstruct_cache(conf): """If CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE is changed, this function will adapt the database to either store or not store the recstruct format.""" from invenio.intbitset import intbitset from invenio.dbquery import run_sql, serialize_via_marshal from invenio.search_engine import get_record from invenio.bibsched import server_pid, pidfile enable_recstruct_cache = conf.get("Invenio", "CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE") enable_recstruct_cache = enable_recstruct_cache in ('True', '1') pid = server_pid(ping_the_process=False) if pid: print >> sys.stderr, "ERROR: bibsched seems to run with pid %d, according to %s." % (pid, pidfile) print >> sys.stderr, " Please stop bibsched before running this procedure." sys.exit(1) if enable_recstruct_cache: print ">>> Searching records which need recstruct cache resetting; this may take a while..." all_recids = intbitset(run_sql("SELECT id FROM bibrec")) good_recids = intbitset(run_sql("SELECT bibrec.id FROM bibrec JOIN bibfmt ON bibrec.id = bibfmt.id_bibrec WHERE format='recstruct' AND modification_date < last_updated")) recids = all_recids - good_recids print ">>> Generating recstruct cache..." tot = len(recids) count = 0 for recid in recids: value = serialize_via_marshal(get_record(recid)) run_sql("DELETE FROM bibfmt WHERE id_bibrec=%s AND format='recstruct'", (recid, )) run_sql("INSERT INTO bibfmt(id_bibrec, format, last_updated, value) VALUES(%s, 'recstruct', NOW(), %s)", (recid, value)) count += 1 if count % 1000 == 0: print " ... done records %s/%s" % (count, tot) if count % 1000 != 0: print " ... done records %s/%s" % (count, tot) print ">>> recstruct cache generated successfully." else: print ">>> Cleaning recstruct cache..." run_sql("DELETE FROM bibfmt WHERE format='recstruct'") def cli_cmd_reset_siteadminemail(conf): """ Reset user-related tables with new CFG_SITE_ADMIN_EMAIL read from conf files. """ print ">>> Going to reset CFG_SITE_ADMIN_EMAIL..." from invenio.dbquery import run_sql siteadminemail = conf.get("Invenio", "CFG_SITE_ADMIN_EMAIL") run_sql("DELETE FROM user WHERE id=1") run_sql("""INSERT INTO user (id, email, password, note, nickname) VALUES (1, %s, AES_ENCRYPT(email, ''), 1, 'admin')""", (siteadminemail,)) print "You may want to restart Apache now." print ">>> CFG_SITE_ADMIN_EMAIL reset successfully." def cli_cmd_reset_fieldnames(conf): """ Reset I18N field names such as author, title, etc and other I18N ranking method names such as word similarity. Their translations are taken from the PO files. """ print ">>> Going to reset I18N field names..." from invenio.messages import gettext_set_language, language_list_long from invenio.dbquery import run_sql, IntegrityError ## get field id and name list: field_id_name_list = run_sql("SELECT id, name FROM field") ## get rankmethod id and name list: rankmethod_id_name_list = run_sql("SELECT id, name FROM rnkMETHOD") ## update names for every language: for lang, dummy in language_list_long(): _ = gettext_set_language(lang) ## this list is put here in order for PO system to pick names ## suitable for translation field_name_names = {"any field": _("any field"), "title": _("title"), "author": _("author"), "abstract": _("abstract"), "keyword": _("keyword"), "report number": _("report number"), "subject": _("subject"), "reference": _("reference"), "fulltext": _("fulltext"), "collection": _("collection"), "division": _("division"), "year": _("year"), "journal": _("journal"), "experiment": _("experiment"), "record ID": _("record ID")} ## update I18N names for every language: for (field_id, field_name) in field_id_name_list: if field_name_names.has_key(field_name): try: run_sql("""INSERT INTO fieldname (id_field,ln,type,value) VALUES (%s,%s,%s,%s)""", (field_id, lang, 'ln', field_name_names[field_name])) except IntegrityError: run_sql("""UPDATE fieldname SET value=%s WHERE id_field=%s AND ln=%s AND type=%s""", (field_name_names[field_name], field_id, lang, 'ln',)) ## ditto for rank methods: rankmethod_name_names = {"wrd": _("word similarity"), "demo_jif": _("journal impact factor"), "citation": _("times cited"), "citerank_citation_t": _("time-decay cite count"), "citerank_pagerank_c": _("all-time-best cite rank"), "citerank_pagerank_t": _("time-decay cite rank"),} for (rankmethod_id, rankmethod_name) in rankmethod_id_name_list: if rankmethod_name_names.has_key(rankmethod_name): try: run_sql("""INSERT INTO rnkMETHODNAME (id_rnkMETHOD,ln,type,value) VALUES (%s,%s,%s,%s)""", (rankmethod_id, lang, 'ln', rankmethod_name_names[rankmethod_name])) except IntegrityError: run_sql("""UPDATE rnkMETHODNAME SET value=%s WHERE id_rnkMETHOD=%s AND ln=%s AND type=%s""", (rankmethod_name_names[rankmethod_name], rankmethod_id, lang, 'ln',)) print ">>> I18N field names reset successfully." def cli_check_openoffice(conf): """ If OpenOffice.org integration is enabled, checks whether the system is properly configured. """ from invenio.bibtask import check_running_process_user from invenio.websubmit_file_converter import can_unoconv, get_file_converter_logger logger = get_file_converter_logger() for handler in logger.handlers: logger.removeHandler(handler) check_running_process_user() print ">>> Checking if Libre/OpenOffice.org is correctly integrated...", sys.stdout.flush() if can_unoconv(True): print "ok" else: sys.exit(1) def test_db_connection(): """ Test DB connection, and if fails, advise user how to set it up. Useful to be called during table creation. """ print "Testing DB connection...", from invenio.textutils import wrap_text_in_a_box from invenio.dbquery import run_sql, Error ## first, test connection to the DB server: try: run_sql("SHOW TABLES") except Error, err: from invenio.dbquery import CFG_DATABASE_HOST, CFG_DATABASE_PORT, \ CFG_DATABASE_NAME, CFG_DATABASE_USER, CFG_DATABASE_PASS print wrap_text_in_a_box("""\ DATABASE CONNECTIVITY ERROR %(errno)d: %(errmsg)s.\n Perhaps you need to set up database and connection rights? If yes, then please login as MySQL admin user and run the following commands now: $ mysql -h %(dbhost)s -P %(dbport)s -u root -p mysql mysql> CREATE DATABASE %(dbname)s DEFAULT CHARACTER SET utf8; mysql> GRANT ALL PRIVILEGES ON %(dbname)s.* TO %(dbuser)s@%(webhost)s IDENTIFIED BY '%(dbpass)s'; mysql> QUIT The values printed above were detected from your configuration. If they are not right, then please edit your invenio-local.conf file and rerun 'inveniocfg --update-all' first. If the problem is of different nature, then please inspect the above error message and fix the problem before continuing.""" % \ {'errno': err.args[0], 'errmsg': err.args[1], 'dbname': CFG_DATABASE_NAME, 'dbhost': CFG_DATABASE_HOST, 'dbport': CFG_DATABASE_PORT, 'dbuser': CFG_DATABASE_USER, 'dbpass': CFG_DATABASE_PASS, 'webhost': CFG_DATABASE_HOST == 'localhost' and 'localhost' or os.popen('hostname -f', 'r').read().strip(), }) sys.exit(1) print "ok" ## second, test insert/select of a Unicode string to detect ## possible Python/MySQL/MySQLdb mis-setup: print "Testing Python/MySQL/MySQLdb UTF-8 chain...", try: beta_in_utf8 = "β" # Greek beta in UTF-8 is 0xCEB2 run_sql("CREATE TEMPORARY TABLE test__invenio__utf8 (x char(1), y varbinary(2)) DEFAULT CHARACTER SET utf8") run_sql("INSERT INTO test__invenio__utf8 (x, y) VALUES (%s, %s)", (beta_in_utf8, beta_in_utf8)) res = run_sql("SELECT x,y,HEX(x),HEX(y),LENGTH(x),LENGTH(y),CHAR_LENGTH(x),CHAR_LENGTH(y) FROM test__invenio__utf8") assert res[0] == ('\xce\xb2', '\xce\xb2', 'CEB2', 'CEB2', 2L, 2L, 1L, 2L) run_sql("DROP TEMPORARY TABLE test__invenio__utf8") except Exception, err: print wrap_text_in_a_box("""\ DATABASE RELATED ERROR %s\n A problem was detected with the UTF-8 treatment in the chain between the Python application, the MySQLdb connector, and the MySQL database. You may perhaps have installed older versions of some prerequisite packages?\n Please check the INSTALL file and please fix this problem before continuing.""" % err) sys.exit(1) print "ok" def cli_cmd_create_tables(conf): """Create and fill Invenio DB tables. Useful for the installation process.""" print ">>> Going to create and fill tables..." from invenio.config import CFG_PREFIX test_db_connection() for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabcreate.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/dbexec < %s/lib/sql/invenio/tabfill.sql" % (CFG_PREFIX, CFG_PREFIX)]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) for cmd in ["%s/bin/webaccessadmin -u admin -c -a" % CFG_PREFIX]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables created and filled successfully." def cli_cmd_load_webstat_conf(conf): print ">>> Going to load WebStat config..." from invenio.config import CFG_PREFIX cmd = "%s/bin/webstatadmin --load-config" % CFG_PREFIX if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> WebStat config load successfully." def cli_cmd_drop_tables(conf): """Drop Invenio DB tables. Useful for the uninstallation process.""" print ">>> Going to drop tables..." from invenio.config import CFG_PREFIX from invenio.textutils import wrap_text_in_a_box, wait_for_user from invenio.webstat import destroy_customevents wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your database tables!""")) msg = destroy_customevents() if msg: print msg cmd = "%s/bin/dbexec < %s/lib/sql/invenio/tabdrop.sql" % (CFG_PREFIX, CFG_PREFIX) if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables dropped successfully." def cli_cmd_create_demo_site(conf): """Create demo site. Useful for testing purposes.""" print ">>> Going to create demo site..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql run_sql("TRUNCATE schTASK") run_sql("TRUNCATE session") run_sql("DELETE FROM user WHERE email=''") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/democfgdata.sql" % \ (CFG_PREFIX, CFG_PREFIX),]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) cli_cmd_reset_fieldnames(conf) # needed for I18N demo ranking method names for cmd in ["%s/bin/webaccessadmin -u admin -c -r -D" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX, "%s/bin/bibsort -u admin --load-config" % CFG_PREFIX, "%s/bin/bibsort 2" % CFG_PREFIX, ]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo site created successfully." def cli_cmd_load_demo_records(conf): """Load demo records. Useful for testing purposes.""" from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql print ">>> Going to load demo records..." run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/bibupload -u admin -i %s/var/tmp/demobibdata.xml" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/bibupload 1" % CFG_PREFIX, "%s/bin/bibdocfile --textify --with-ocr --recid 97" % CFG_PREFIX, "%s/bin/bibdocfile --textify --all" % CFG_PREFIX, "%s/bin/bibindex -u admin" % CFG_PREFIX, "%s/bin/bibindex 2" % CFG_PREFIX, "%s/bin/bibreformat -u admin -o HB" % CFG_PREFIX, "%s/bin/bibreformat 3" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 4" % CFG_PREFIX, "%s/bin/bibrank -u admin" % CFG_PREFIX, "%s/bin/bibrank 5" % CFG_PREFIX, "%s/bin/bibsort -u admin -R" % CFG_PREFIX, "%s/bin/bibsort 6" % CFG_PREFIX, "%s/bin/oairepositoryupdater -u admin" % CFG_PREFIX, "%s/bin/oairepositoryupdater 7" % CFG_PREFIX, "%s/bin/bibupload 8" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records loaded successfully." def cli_cmd_remove_demo_records(conf): """Remove demo records. Useful when you are finished testing.""" print ">>> Going to remove demo records..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql from invenio.textutils import wrap_text_in_a_box, wait_for_user wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your records and documents!""")) if os.path.exists(CFG_PREFIX + os.sep + 'var' + os.sep + 'data'): shutil.rmtree(CFG_PREFIX + os.sep + 'var' + os.sep + 'data') run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabbibclean.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records removed successfully." def cli_cmd_drop_demo_site(conf): """Drop demo site completely. Useful when you are finished testing.""" print ">>> Going to drop demo site..." from invenio.textutils import wrap_text_in_a_box, wait_for_user wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your site and documents!""")) cli_cmd_drop_tables(conf) cli_cmd_create_tables(conf) cli_cmd_remove_demo_records(conf) print ">>> Demo site dropped successfully." def cli_cmd_run_unit_tests(conf): """Run unit tests, usually on the working demo site.""" from invenio.testutils import build_and_run_unit_test_suite build_and_run_unit_test_suite() def cli_cmd_run_regression_tests(conf): """Run regression tests, usually on the working demo site.""" from invenio.testutils import build_and_run_regression_test_suite build_and_run_regression_test_suite() def cli_cmd_run_web_tests(conf): """Run web tests in a browser. Requires Firefox with Selenium.""" from invenio.testutils import build_and_run_web_test_suite build_and_run_web_test_suite() def _detect_ip_address(): """Detect IP address of this computer. Useful for creating Apache vhost conf snippet on RHEL like machines. @return: IP address, or '*' if cannot detect @rtype: string @note: creates socket for real in order to detect real IP address, not the loopback one. """ try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(('invenio-software.org', 0)) return s.getsockname()[0] except: return '*' def cli_cmd_create_apache_conf(conf): """ Create Apache conf files for this site, keeping previous files in a backup copy. """ print ">>> Going to create Apache conf files..." from invenio.textutils import wrap_text_in_a_box from invenio.access_control_config import CFG_EXTERNAL_AUTH_USING_SSO apache_conf_dir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'apache' ## Preparation of XSendFile directive xsendfile_directive_needed = int(conf.get("Invenio", 'CFG_BIBDOCFILE_USE_XSENDFILE')) != 0 if xsendfile_directive_needed: xsendfile_directive = "XSendFile On\n" else: xsendfile_directive = "#XSendFile On\n" for path in (conf.get('Invenio', 'CFG_WEBSUBMIT_FILEDIR'), # BibDocFile conf.get('Invenio', 'CFG_WEBDIR'), conf.get('Invenio', 'CFG_WEBSUBMIT_STORAGEDIR'), # WebSubmit conf.get('Invenio', 'CFG_TMPDIR'), os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'tmp', 'attachfile'), os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'data', 'comments'), os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'data', 'baskets', 'comments'), '/tmp'): # BibExport if xsendfile_directive_needed: xsendfile_directive += ' XSendFilePath %s\n' % path else: xsendfile_directive += ' #XSendFilePath %s\n' % path xsendfile_directive = xsendfile_directive.strip() ## Preparation of deflate directive deflate_directive_needed = int(conf.get("Invenio", 'CFG_WEBSTYLE_HTTP_USE_COMPRESSION')) != 0 if deflate_directive_needed: deflate_directive = r""" ## Configuration snippet taken from: ## <http://httpd.apache.org/docs/2.2/mod/mod_deflate.html> <IfModule mod_deflate.c> SetOutputFilter DEFLATE # Netscape 4.x has some problems... BrowserMatch ^Mozilla/4 gzip-only-text/html # Netscape 4.06-4.08 have some more problems BrowserMatch ^Mozilla/4\.0[678] no-gzip # MSIE masquerades as Netscape, but it is fine # BrowserMatch \bMSIE !no-gzip !gzip-only-text/html # NOTE: Due to a bug in mod_setenvif up to Apache 2.0.48 # the above regex won't work. You can use the following # workaround to get the desired effect: BrowserMatch \bMSI[E] !no-gzip !gzip-only-text/html # Don't compress images SetEnvIfNoCase Request_URI \ \.(?:gif|jpe?g|png)$ no-gzip dont-vary # Make sure proxies don't deliver the wrong content <IfModule mod_header.c> Header append Vary User-Agent env=!dont-vary </IfModule> </IfModule> """ else: deflate_directive = "" if CFG_EXTERNAL_AUTH_USING_SSO: shibboleth_directive = r""" <Location ~ "/youraccount/login|Shibboleth.sso/"> SSLRequireSSL # The modules only work using HTTPS AuthType shibboleth ShibRequireSession On ShibRequireAll On ShibExportAssertion Off require valid-user </Location> """ else: shibboleth_directive = "" ## Apache vhost conf file is distro specific, so analyze needs: # Gentoo (and generic defaults): listen_directive_needed = True ssl_pem_directive_needed = False ssl_pem_path = '/etc/apache2/ssl/apache.pem' ssl_crt_path = '/etc/apache2/ssl/server.crt' ssl_key_path = '/etc/apache2/ssl/server.key' vhost_ip_address_needed = False wsgi_socket_directive_needed = False # Debian: if os.path.exists(os.path.sep + 'etc' + os.path.sep + 'debian_version'): listen_directive_needed = False ssl_pem_directive_needed = True # RHEL/SLC: if os.path.exists(os.path.sep + 'etc' + os.path.sep + 'redhat-release'): listen_directive_needed = False ssl_crt_path = '/etc/pki/tls/certs/localhost.crt' ssl_key_path = '/etc/pki/tls/private/localhost.key' vhost_ip_address_needed = True wsgi_socket_directive_needed = True # maybe we are using non-standard ports? vhost_site_url = conf.get('Invenio', 'CFG_SITE_URL').replace("http://", "") if vhost_site_url.startswith("https://"): ## The installation is configured to require HTTPS for any connection vhost_site_url = vhost_site_url.replace("https://", "") vhost_site_url_port = '80' vhost_site_secure_url = conf.get('Invenio', 'CFG_SITE_SECURE_URL').replace("https://", "") vhost_site_secure_url_port = '443' if ':' in vhost_site_url: vhost_site_url, vhost_site_url_port = vhost_site_url.split(':', 1) if ':' in vhost_site_secure_url: vhost_site_secure_url, vhost_site_secure_url_port = vhost_site_secure_url.split(':', 1) if vhost_site_url_port != '80' or vhost_site_secure_url_port != '443': listen_directive_needed = True ## OK, let's create Apache vhost files: if not os.path.exists(apache_conf_dir): os.mkdir(apache_conf_dir) apache_vhost_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost.conf' apache_vhost_ssl_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost-ssl.conf' apache_vhost_body = """\ AddDefaultCharset UTF-8 ServerSignature Off ServerTokens Prod NameVirtualHost %(vhost_ip_address)s:%(vhost_site_url_port)s %(listen_directive)s %(wsgi_socket_directive)s WSGIRestrictStdout Off <Files *.pyc> deny from all </Files> <Files *~> deny from all </Files> <VirtualHost %(vhost_ip_address)s:%(vhost_site_url_port)s> ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s DocumentRoot %(webdir)s <Directory %(webdir)s> Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all </Directory> ErrorLog %(logdir)s/apache.err LogLevel warn CustomLog %(logdir)s/apache.log combined DirectoryIndex index.en.html index.html Alias /img/ %(webdir)s/img/ Alias /js/ %(webdir)s/js/ Alias /flash/ %(webdir)s/flash/ Alias /css/ %(webdir)s/css/ Alias /export/ %(webdir)s/export/ Alias /MathJax/ %(webdir)s/MathJax/ Alias /jsCalendar/ %(webdir)s/jsCalendar/ Alias /ckeditor/ %(webdir)s/ckeditor/ Alias /mediaelement/ %(webdir)s/mediaelement/ AliasMatch /sitemap-(.*) %(webdir)s/sitemap-$1 Alias /robots.txt %(webdir)s/robots.txt Alias /favicon.ico %(webdir)s/favicon.ico WSGIDaemonProcess invenio processes=5 threads=1 display-name=%%{GROUP} inactivity-timeout=3600 maximum-requests=10000 WSGIImportScript %(wsgidir)s/invenio.wsgi process-group=invenio application-group=%%{GLOBAL} WSGIScriptAlias / %(wsgidir)s/invenio.wsgi WSGIPassAuthorization On %(xsendfile_directive)s <Directory %(wsgidir)s> WSGIProcessGroup invenio WSGIApplicationGroup %%{GLOBAL} Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all </Directory> %(deflate_directive)s </VirtualHost> """ % {'vhost_site_url_port': vhost_site_url_port, 'servername': vhost_site_url, 'serveralias': vhost_site_url.split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), 'libdir' : conf.get('Invenio', 'CFG_PYLIBDIR'), 'wsgidir': os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'www-wsgi'), 'vhost_ip_address': vhost_ip_address_needed and _detect_ip_address() or '*', 'listen_directive': listen_directive_needed and 'Listen ' + vhost_site_url_port or \ '#Listen ' + vhost_site_url_port, 'wsgi_socket_directive': (wsgi_socket_directive_needed and \ 'WSGISocketPrefix ' or '#WSGISocketPrefix ') + \ conf.get('Invenio', 'CFG_PREFIX') + os.sep + 'var' + os.sep + 'run', 'xsendfile_directive' : xsendfile_directive, 'deflate_directive': deflate_directive, } apache_vhost_ssl_body = """\ ServerSignature Off ServerTokens Prod %(listen_directive)s NameVirtualHost %(vhost_ip_address)s:%(vhost_site_secure_url_port)s %(ssl_pem_directive)s %(ssl_crt_directive)s %(ssl_key_directive)s WSGIRestrictStdout Off <Files *.pyc> deny from all </Files> <Files *~> deny from all </Files> <VirtualHost %(vhost_ip_address)s:%(vhost_site_secure_url_port)s> ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s SSLEngine on DocumentRoot %(webdir)s <Directory %(webdir)s> Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all </Directory> ErrorLog %(logdir)s/apache-ssl.err LogLevel warn CustomLog %(logdir)s/apache-ssl.log combined DirectoryIndex index.en.html index.html Alias /img/ %(webdir)s/img/ Alias /js/ %(webdir)s/js/ Alias /flash/ %(webdir)s/flash/ Alias /css/ %(webdir)s/css/ Alias /export/ %(webdir)s/export/ Alias /MathJax/ %(webdir)s/MathJax/ Alias /jsCalendar/ %(webdir)s/jsCalendar/ Alias /ckeditor/ %(webdir)s/ckeditor/ Alias /mediaelement/ %(webdir)s/mediaelement/ AliasMatch /sitemap-(.*) %(webdir)s/sitemap-$1 Alias /robots.txt %(webdir)s/robots.txt Alias /favicon.ico %(webdir)s/favicon.ico RedirectMatch /sslredirect/(.*) http://$1 WSGIScriptAlias / %(wsgidir)s/invenio.wsgi WSGIPassAuthorization On %(xsendfile_directive)s <Directory %(wsgidir)s> WSGIProcessGroup invenio WSGIApplicationGroup %%{GLOBAL} Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all </Directory> %(deflate_directive)s %(shibboleth_directive)s </VirtualHost> """ % {'vhost_site_secure_url_port': vhost_site_secure_url_port, 'servername': vhost_site_secure_url, 'serveralias': vhost_site_secure_url.split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), 'libdir' : conf.get('Invenio', 'CFG_PYLIBDIR'), 'wsgidir' : os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'www-wsgi'), 'vhost_ip_address': vhost_ip_address_needed and _detect_ip_address() or '*', 'listen_directive' : listen_directive_needed and 'Listen ' + vhost_site_secure_url_port or \ '#Listen ' + vhost_site_secure_url_port, 'ssl_pem_directive': ssl_pem_directive_needed and \ 'SSLCertificateFile %s' % ssl_pem_path or \ '#SSLCertificateFile %s' % ssl_pem_path, 'ssl_crt_directive': ssl_pem_directive_needed and \ '#SSLCertificateFile %s' % ssl_crt_path or \ 'SSLCertificateFile %s' % ssl_crt_path, 'ssl_key_directive': ssl_pem_directive_needed and \ '#SSLCertificateKeyFile %s' % ssl_key_path or \ 'SSLCertificateKeyFile %s' % ssl_key_path, 'xsendfile_directive' : xsendfile_directive, 'deflate_directive': deflate_directive, 'shibboleth_directive': shibboleth_directive, } # write HTTP vhost snippet: if os.path.exists(apache_vhost_file): shutil.copy(apache_vhost_file, apache_vhost_file + '.OLD') fdesc = open(apache_vhost_file, 'w') fdesc.write(apache_vhost_body) fdesc.close() print print "Created file", apache_vhost_file # write HTTPS vhost snippet: vhost_ssl_created = False if conf.get('Invenio', 'CFG_SITE_SECURE_URL').startswith("https://"): if os.path.exists(apache_vhost_ssl_file): shutil.copy(apache_vhost_ssl_file, apache_vhost_ssl_file + '.OLD') fdesc = open(apache_vhost_ssl_file, 'w') fdesc.write(apache_vhost_ssl_body) fdesc.close() vhost_ssl_created = True print "Created file", apache_vhost_ssl_file print wrap_text_in_a_box("""\ Apache virtual host configuration file(s) for your Invenio site was(were) created. Please check created file(s) and activate virtual host(s). For example, you can put the following include statements in your httpd.conf:\n Include %s %s Please see the INSTALL file for more details. """ % (apache_vhost_file, (vhost_ssl_created and 'Include ' or '#Include ') + apache_vhost_ssl_file)) print ">>> Apache conf files created." def cli_cmd_get(conf, varname): """ Return value of VARNAME read from CONF files. Useful for third-party programs to access values of conf options such as CFG_PREFIX. Return None if VARNAME is not found. """ # do not pay attention to upper/lower case: varname = varname.lower() # do not pay attention to section names yet: all_options = {} for section in conf.sections(): for option in conf.options(section): all_options[option] = conf.get(section, option) return all_options.get(varname, None) def cli_cmd_list(conf): """ Print a list of all conf options and values from CONF. """ sections = conf.sections() sections.sort() for section in sections: options = conf.options(section) options.sort() for option in options: print option.upper(), '=', conf.get(section, option) def _grep_version_from_executable(path_to_exec, version_regexp): """ Try to detect a program version by digging into its binary PATH_TO_EXEC and looking for VERSION_REGEXP. Return program version as a string. Return empty string if not succeeded. """ from invenio.shellutils import run_shell_command exec_version = "" if os.path.exists(path_to_exec): dummy1, cmd2_out, dummy2 = run_shell_command("strings %s | grep %s", (path_to_exec, version_regexp)) if cmd2_out: for cmd2_out_line in cmd2_out.split("\n"): if len(cmd2_out_line) > len(exec_version): # the longest the better exec_version = cmd2_out_line return exec_version def detect_apache_version(): """ Try to detect Apache version by localizing httpd or apache executables and grepping inside binaries. Return list of all found Apache versions and paths. (For a given executable, the returned format is 'apache_version [apache_path]'.) Return empty list if no success. """ from invenio.shellutils import run_shell_command out = [] dummy1, cmd_out, dummy2 = run_shell_command("locate bin/httpd bin/apache") for apache in cmd_out.split("\n"): apache_version = _grep_version_from_executable(apache, '^Apache\/') if apache_version: out.append("%s [%s]" % (apache_version, apache)) return out def cli_cmd_detect_system_details(conf): """ Detect and print system details such as Apache/Python/MySQL versions etc. Useful for debugging problems on various OS. """ import MySQLdb print ">>> Going to detect system details..." print "* Hostname: " + socket.gethostname() print "* Invenio version: " + conf.get("Invenio", "CFG_VERSION") print "* Python version: " + sys.version.replace("\n", " ") print "* Apache version: " + ";\n ".join(detect_apache_version()) print "* MySQLdb version: " + MySQLdb.__version__ try: from invenio.dbquery import run_sql print "* MySQL version:" for key, val in run_sql("SHOW VARIABLES LIKE 'version%'") + \ run_sql("SHOW VARIABLES LIKE 'charact%'") + \ run_sql("SHOW VARIABLES LIKE 'collat%'"): if False: print " - %s: %s" % (key, val) elif key in ['version', 'character_set_client', 'character_set_connection', 'character_set_database', 'character_set_results', 'character_set_server', 'character_set_system', 'collation_connection', 'collation_database', 'collation_server']: print " - %s: %s" % (key, val) except ImportError: print "* ERROR: cannot import dbquery" print ">>> System details detected successfully." def main(): """Main entry point.""" conf = ConfigParser() if '--help' in sys.argv or \ '-h' in sys.argv: print_usage() elif '--version' in sys.argv or \ '-V' in sys.argv: print_version() else: confdir = None if '--conf-dir' in sys.argv: try: confdir = sys.argv[sys.argv.index('--conf-dir') + 1] except IndexError: pass # missing --conf-dir argument value if not os.path.exists(confdir): print "ERROR: bad or missing --conf-dir option value." sys.exit(1) else: ## try to detect path to conf dir (relative to this bin dir): confdir = re.sub(r'/bin$', '/etc', sys.path[0]) ## read conf files: for conffile in [confdir + os.sep + 'invenio.conf', confdir + os.sep + 'invenio-autotools.conf', confdir + os.sep + 'invenio-local.conf',]: if os.path.exists(conffile): conf.read(conffile) else: if not conffile.endswith("invenio-local.conf"): # invenio-local.conf is optional, otherwise stop print "ERROR: Badly guessed conf file location", conffile print "(Please use --conf-dir option.)" sys.exit(1) ## decide what to do: done = False for opt_idx in range(0, len(sys.argv)): opt = sys.argv[opt_idx] if opt == '--conf-dir': # already treated before, so skip silently: pass elif opt == '--get': try: varname = sys.argv[opt_idx + 1] except IndexError: print "ERROR: bad or missing --get option value." sys.exit(1) if varname.startswith('-'): print "ERROR: bad or missing --get option value." sys.exit(1) varvalue = cli_cmd_get(conf, varname) if varvalue is not None: print varvalue else: sys.exit(1) done = True elif opt == '--list': cli_cmd_list(conf) done = True elif opt == '--detect-system-details': cli_cmd_detect_system_details(conf) done = True elif opt == '--create-tables': cli_cmd_create_tables(conf) done = True elif opt == '--load-webstat-conf': cli_cmd_load_webstat_conf(conf) done = True elif opt == '--drop-tables': cli_cmd_drop_tables(conf) done = True elif opt == '--check-openoffice': cli_check_openoffice(conf) done = True elif opt == '--create-demo-site': cli_cmd_create_demo_site(conf) done = True elif opt == '--load-demo-records': cli_cmd_load_demo_records(conf) done = True elif opt == '--remove-demo-records': cli_cmd_remove_demo_records(conf) done = True elif opt == '--drop-demo-site': cli_cmd_drop_demo_site(conf) done = True elif opt == '--run-unit-tests': cli_cmd_run_unit_tests(conf) done = True elif opt == '--run-regression-tests': cli_cmd_run_regression_tests(conf) done = True elif opt == '--run-web-tests': cli_cmd_run_web_tests(conf) done = True elif opt == '--update-all': cli_cmd_update_config_py(conf) cli_cmd_update_dbquery_py(conf) cli_cmd_update_dbexec(conf) cli_cmd_update_bibconvert_tpl(conf) cli_cmd_update_web_tests(conf) done = True elif opt == '--update-config-py': cli_cmd_update_config_py(conf) done = True elif opt == '--update-dbquery-py': cli_cmd_update_dbquery_py(conf) done = True elif opt == '--update-dbexec': cli_cmd_update_dbexec(conf) done = True elif opt == '--update-bibconvert-tpl': cli_cmd_update_bibconvert_tpl(conf) done = True elif opt == '--update-web-tests': cli_cmd_update_web_tests(conf) done = True elif opt == '--reset-all': cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) cli_cmd_reset_recstruct_cache(conf) done = True elif opt == '--reset-sitename': cli_cmd_reset_sitename(conf) done = True elif opt == '--reset-siteadminemail': cli_cmd_reset_siteadminemail(conf) done = True elif opt == '--reset-fieldnames': cli_cmd_reset_fieldnames(conf) done = True elif opt == '--reset-recstruct-cache': cli_cmd_reset_recstruct_cache(conf) done = True elif opt == '--create-apache-conf': cli_cmd_create_apache_conf(conf) done = True elif opt.startswith("-") and opt != '--yes-i-know': print "ERROR: unknown option", opt sys.exit(1) if not done: print """ERROR: Please specify a command. Please see '--help'.""" sys.exit(1) if __name__ == '__main__': main()