diff --git a/config/invenio.conf b/config/invenio.conf index 51b9500a6..509ce0081 100644 --- a/config/invenio.conf +++ b/config/invenio.conf @@ -1,567 +1,572 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ################################################### ## About 'invenio.conf' and 'invenio-local.conf' ## ################################################### ## The 'invenio.conf' file contains the vanilla default configuration ## parameters of a CDS Invenio installation, as coming from the ## distribution. The file should be self-explanatory. Once installed ## in its usual location (usually /opt/cds-invenio/etc), you could in ## principle go ahead and change the values according to your local ## needs. ## ## However, you can also create a file named 'invenio-local.conf' in ## the same directory where 'invenio.conf' lives and put there only ## the localizations you need to have different from the default ones. ## For example: ## ## $ cat /opt/cds-invenio/etc/invenio-local.conf ## [Invenio] ## CFG_SITE_URL = http://your.site.com ## CFG_SITE_SECURE_URL = https://your.site.com ## CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com ## CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com ## ## The Invenio system will then read both the default invenio.conf ## file and your customized invenio-local.conf file and it will ## override any default options with the ones you have set in your ## local file. This cascading of configuration parameters will ease ## you future upgrades. [Invenio] ################################### ## Part 1: Essential parameters ## ################################### ## This part defines essential CDS Invenio internal parameters that ## everybody should override, like the name of the server or the email ## address of the local CDS Invenio administrator. ## CFG_DATABASE_* - specify which MySQL server to use, the name of the ## database to use, and the database access credentials. CFG_DATABASE_HOST = localhost CFG_DATABASE_NAME = cdsinvenio CFG_DATABASE_USER = cdsinvenio CFG_DATABASE_PASS = my123p$ss ## CFG_SITE_URL - specify URL under which your installation will be ## visible. For example, use "http://your.site.com". Do not leave ## trailing slash. CFG_SITE_URL = http://localhost ## CFG_SITE_SECURE_URL - specify secure URL under which your ## installation secure pages such as login or registration will be ## visible. For example, use "https://your.site.com". Do not leave ## trailing slash. If you don't plan on using HTTPS, then you may ## leave this empty. CFG_SITE_SECURE_URL = https://localhost ## CFG_SITE_NAME -- the visible name of your CDS Invenio installation. CFG_SITE_NAME = Atlantis Institute of Fictive Science ## CFG_SITE_NAME_INTL -- the international versions of CFG_SITE_NAME ## in various languages. (See also CFG_SITE_LANGS below.) CFG_SITE_NAME_INTL_en = Atlantis Institute of Fictive Science CFG_SITE_NAME_INTL_fr = Atlantis Institut des Sciences Fictives CFG_SITE_NAME_INTL_de = Atlantis Institut der fiktiven Wissenschaft CFG_SITE_NAME_INTL_es = Atlantis Instituto de la Ciencia Fictive CFG_SITE_NAME_INTL_ca = Institut Atlantis de Ciència Fictícia CFG_SITE_NAME_INTL_pt = Instituto Atlantis de Ciência Fictícia CFG_SITE_NAME_INTL_it = Atlantis Istituto di Scienza Fittizia CFG_SITE_NAME_INTL_ru = Атлантис Институт фиктивных Наук CFG_SITE_NAME_INTL_sk = Atlantis Inštitút Fiktívnych Vied CFG_SITE_NAME_INTL_cs = Atlantis Institut Fiktivních Věd CFG_SITE_NAME_INTL_no = Atlantis Institutt for Fiktiv Vitenskap CFG_SITE_NAME_INTL_sv = Atlantis Institut för Fiktiv Vetenskap CFG_SITE_NAME_INTL_el = Ινστιτούτο Φανταστικών Επιστημών Ατλαντίδος CFG_SITE_NAME_INTL_uk = Інститут вигаданих наук в Атлантісі CFG_SITE_NAME_INTL_ja = Fictive 科学のAtlantis の協会 CFG_SITE_NAME_INTL_pl = Instytut Fikcyjnej Nauki Atlantis CFG_SITE_NAME_INTL_bg = Институт за фиктивни науки Атлантис CFG_SITE_NAME_INTL_hr = Institut Fiktivnih Znanosti Atlantis CFG_SITE_NAME_INTL_zh_CN = 阿特兰提斯虚拟科学学院 CFG_SITE_NAME_INTL_zh_TW = 阿特蘭提斯虛擬科學學院 ## CFG_SITE_LANG -- the default language of the interface: CFG_SITE_LANG = en ## CFG_SITE_LANGS -- list of all languages the user interface should ## be available in, separated by commas. The order specified below ## will be respected on the interface pages. A good default would be ## to use the alphabetical order. Currently supported languages ## include Bulgarian, Catalan, Czech, German, Greek, English, Spanish, ## French, Italian, Japanese, Norwegian, Polish, Portuguese, Russian, ## Slovak, Swedish, and Ukrainian, Chinese (China), Chinese (Taiwan), ## so that the current eventual maximum you can currently select is ## "bg,ca,cs,de,el,en,es,fr,hr,it,ja,no,pl,pt,ru,sk,sv,uk,zh_CN,zh_TW". CFG_SITE_LANGS = bg,ca,cs,de,el,en,es,fr,hr,it,ja,no,pl,pt,ru,sk,sv,uk,zh_CN,zh_TW ## CFG_SITE_SUPPORT_EMAIL -- the email address of the support team for ## this installation: CFG_SITE_SUPPORT_EMAIL = cds.support@cern.ch ## CFG_SITE_ADMIN_EMAIL -- the email address of the 'superuser' for ## this installation. Enter your email address below and login with ## this address when using CDS Invenio administration modules. You ## will then be automatically recognized as superuser of the system. CFG_SITE_ADMIN_EMAIL = cds.support@cern.ch ## CFG_MAX_CACHED_QUERIES -- maximum cached queries number possible ## after reaching this number of cached queries the cache is pruned -## deleting half of the older accessed cached queries. +## deleting half of the older accessed cached queries. CFG_MAX_CACHED_QUERIES = 10000 # FIXME: change name to express SQL queries ## CFG_MISCUTIL_USE_SQLALCHEMY -- whether to use SQLAlchemy.pool in ## the DB engine of CDS Invenio. It is okay to enable this flag even ## if you have not installed SQLAlchemy. Note that Invenio will loose ## some perfomance if CFG_MISCUTIL_USE_SQLALCHEMY is enabled. CFG_MISCUTIL_USE_SQLALCHEMY = False ## CFG_MISCUTIL_SMTP_HOST -- which server to use as outgoing mail server to ## send outgoing emails generated by the system, for example concerning ## submissions or email notification alerts. CFG_MISCUTIL_SMTP_HOST = localhost ## CFG_MISCUTIL_SMTP_PORT -- which port to use on the outgoing mail server ## defined in the previous step. CFG_MISCUTIL_SMTP_PORT = 25 ## CFG_APACHE_PASSWORD_FILE -- the file where Apache user credentials ## are stored. Must be an absolute pathname. If the value does not ## start by a slash, it is considered to be the filename of a file ## located under prefix/var/tmp directory. This is useful for the ## demo site testing purposes. For the production site, if you plan ## to restrict access to some collections based on the Apache user ## authentication mechanism, you should put here an absolute path to ## your Apache password file. CFG_APACHE_PASSWORD_FILE = demo-site-apache-user-passwords ## CFG_APACHE_GROUP_FILE -- the file where Apache user groups are ## defined. See the documentation of the preceding config variable. CFG_APACHE_GROUP_FILE = demo-site-apache-user-groups ## CFG_CERN_SITE -- do we want to enable CERN-specific code, like the ## one that proposes links to famous HEP sites such as Spires and KEK? ## Put "1" for "yes" and "0" for "no". CFG_CERN_SITE = 0 ################################ ## Part 2: Web page style ## ################################ ## The variables affecting the page style. The most important one is ## the 'template skin' you would like to use. Please refer to the ## WebStyle Admin Guide for more explanation. The other variables are ## listed here mostly for backwards compatibility purposes only. ## CFG_WEBSTYLE_TEMPLATE_SKIN -- what template skin do you want to ## use? CFG_WEBSTYLE_TEMPLATE_SKIN = default ## CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP -- eventual global HTML left top box: -CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP = +CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP = ## CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM -- eventual global HTML left bottom box: -CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM = +CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM = ## CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP -- eventual global HTML right top box: -CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP = +CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP = ## CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM -- eventual global HTML right bottom box: -CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM = +CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM = ################################## ## Part 3: WebSearch parameters ## ################################## ## This section contains some configuration parameters for WebSearch ## module. Please note that WebSearch is mostly configured on ## run-time via its WebSearch Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (Note that you may modify them ## afterwards too, though.) ## CFG_WEBSEARCH_SEARCH_CACHE_SIZE -- how many queries we want to ## cache in memory per one Apache httpd process? This cache is used ## mainly for "next/previous page" functionality, but it caches also ## "popular" user queries if more than one user happen to search for ## the same thing. Note that large numbers may lead to great memory ## consumption. We recommend a value not greater than 100. CFG_WEBSEARCH_SEARCH_CACHE_SIZE = 100 ## CFG_WEBSEARCH_FIELDS_CONVERT -- if you migrate from an older ## system, you may want to map field codes of your old system (such as ## 'ti') to CDS Invenio/MySQL ("title"). Use Python dictionary syntax ## for the translation table, e.g. {'wau':'author', 'wti':'title'}. ## Usually you don't want to do that, and you would use empty dict {}. CFG_WEBSEARCH_FIELDS_CONVERT = {} ## CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH -- width of the search ## pattern window in the simple search interface, in characters. CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 40 ## CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the advanced search interface, in ## characters. CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH = 30 ## CFG_WEBSEARCH_NB_RECORDS_TO_SORT -- how many records do we still ## want to sort? For higher numbers we print only a warning and won't ## perform any sorting other than default 'latest records first', as ## sorting would be very time consuming then. We recommend a value of ## not more than a couple of thousands. CFG_WEBSEARCH_NB_RECORDS_TO_SORT = 1000 ## CFG_WEBSEARCH_CALL_BIBFORMAT -- if a record is being displayed but ## it was not preformatted in the "HTML brief" format, do we want to ## call BibFormatting on the fly? Put "1" for "yes" and "0" for "no". ## Note that "1" will display the record exactly as if it were fully ## preformatted, but it may be slow due to on-the-fly processing; "0" ## will display a default format very fast, but it may not have all ## the fields as in the fully preformatted HTML brief format. Note ## also that this option is active only for old (PHP) formats; the new ## (Python) formats are called on the fly by default anyway, since ## they are much faster. When usure, please set "0" here. CFG_WEBSEARCH_CALL_BIBFORMAT = 0 ## CFG_WEBSEARCH_USE_ALEPH_SYSNOS -- do we want to make old SYSNOs ## visible rather than MySQL's record IDs? You may use this if you ## migrate from a different e-doc system, and you store your old ## system numbers into 970__a. Put "1" for "yes" and "0" for ## "no". Usually you don't want to do that, though. CFG_WEBSEARCH_USE_ALEPH_SYSNOS = 0 ## CFG_WEBSEARCH_I18N_LATEST_ADDITIONS -- Put "1" if you want the ## "Latest Additions" in the web collection pages to show ## internationalized records. Useful only if your brief BibFormat ## templates contains internationalized strings. Otherwise put "0" in ## order not to slow down the creation of latest additions by WebColl. CFG_WEBSEARCH_I18N_LATEST_ADDITIONS = 0 ## CFG_WEBSEARCH_INSTANT_BROWSE -- the number of records to display ## under 'Latest Additions' in the web collection pages. CFG_WEBSEARCH_INSTANT_BROWSE = 10 ## CFG_WEBSEARCH_INSTANT_BROWSE_RSS -- the number of records to ## display in the RSS feed. CFG_WEBSEARCH_INSTANT_BROWSE_RSS = 25 ## CFG_WEBSEARCH_RSS_TTL -- number of minutes that indicates how long ## a feed cache is valid. CFG_WEBSEARCH_RSS_TTL = 360 ## CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS -- maximum number of request kept ## in cache. If the cache is filled, following request are not cached. CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS = 1000 ## CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD -- up to how many author names ## to print explicitely; for more print "et al". Note that this is ## used in default formatting that is seldomly used, as usually ## BibFormat defines all the format. The value below is only used ## when BibFormat fails, for example. CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD = 3 ## CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS -- whether to show or ## not collection grandsons in Narrow Search boxes (sons are shown by ## default, grandsons are configurable here). Use 0 for no and 1 for ## yes. CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS = 1 ## CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX -- shall we ## create help links for Ellis, Nick or Ellis, Nicholas and friends ## when Ellis, N was searched for? Useful if you have one author ## stored in the database under several name formats, namely surname ## comma firstname and surname comma initial cataloging policy. Use 0 ## for no and 1 for yes. CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX = 1 ## CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS -- jsMath is a Javascript ## library that renders (La)TeX mathematical formulas in the client ## browser. This parameter must contain a list of output format for ## which to apply jsMath rendering, for example "['hd', 'hb']". If ## the list is empty, jsMath is disabled. CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS = [] ####################################### ## Part 4: BibHarvest OAI parameters ## ####################################### ## This part defines parameters for the CDS Invenio OAI gateway. ## Useful if you are running CDS Invenio as OAI data provider. ## CFG_OAI_ID_FIELD -- OAI identifier MARC field: CFG_OAI_ID_FIELD = 909COo ## CFG_OAI_SET_FIELD -- OAI set MARC field: CFG_OAI_SET_FIELD = 909COp ## CFG_OAI_DELETED_POLICY -- OAI deletedrecordspolicy ## (no/transient/persistent). CFG_OAI_DELETED_POLICY = no ## CFG_OAI_ID_PREFIX -- OAI identifier prefix: CFG_OAI_ID_PREFIX = atlantis.cern.ch ## CFG_OAI_SAMPLE_IDENTIFIER -- OAI sample identifier: CFG_OAI_SAMPLE_IDENTIFIER = oai:atlantis.cern.ch:CERN-TH-4036 ## CFG_OAI_IDENTIFY_DESCRIPTION -- description for the OAI Identify verb: CFG_OAI_IDENTIFY_DESCRIPTION = oai atlantis.cern.ch : oai:atlantis.cern.ch:CERN-TH-4036 http://atlantis.cern.ch/ Free and unlimited use by anybody with obligation to refer to original record Full content, i.e. preprints may not be harvested by robots Submission restricted. Submitted documents are subject of approval by OAI repository admins. ## CFG_OAI_LOAD -- OAI number of records in a response: CFG_OAI_LOAD = 1000 ## CFG_OAI_EXPIRE -- OAI resumptionToken expiration time: CFG_OAI_EXPIRE = 90000 ## CFG_OAI_SLEEP -- service unavailable between two consecutive ## requests for CFG_OAI_SLEEP seconds: CFG_OAI_SLEEP = 10 ################################## ## Part 5: WebSubmit parameters ## ################################## ## This section contains some configuration parameters for WebSubmit ## module. Please note that WebSubmit is mostly configured on ## run-time via its WebSubmit Admin web interface. The parameters ## below are the ones that you do not probably want to modify during ## the runtime. ## CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT -- the fulltext ## documents are stored under "/opt/cds-invenio/var/data/files/gX/Y" ## directories where X is 0,1,... and Y stands for bibdoc ID. Thusly ## documents Y are grouped into directories X and this variable ## indicates the maximum number of documents Y stored in each ## directory X. This limit is imposed solely for filesystem ## performance reasons in order not to have too many subdirectories in ## a given directory. CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000 +## CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS -- a list of document +## extensions not listed in Python standard mimetype library that should +## be recognized by Invenio. +CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS = ['hpg', 'link', 'lis', 'llb', 'mat', 'mpp', 'msg'] + ################################# ## Part 6: BibIndex parameters ## ################################# ## This section contains some configuration parameters for BibIndex ## module. Please note that BibIndex is mostly configured on run-time ## via its BibIndex Admin web interface. The parameters below are the ## ones that you do not probably want to modify very often during the ## runtime. ## CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY -- when fulltext indexing, do ## you want to index locally stored files only, or also external URLs? ## Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY = 0 ## CFG_BIBINDEX_REMOVE_STOPWORDS -- when indexing, do we want to remove ## stopwords? Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_REMOVE_STOPWORDS = 0 ## CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS -- characters considered as ## alphanumeric separators of word-blocks inside words. You probably ## don't want to change this. CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS = \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ # FIXME: maybe remove backslashes ## CFG_BIBINDEX_CHARS_PUNCTUATION -- characters considered as punctuation ## between word-blocks inside words. You probably don't want to ## change this. CFG_BIBINDEX_CHARS_PUNCTUATION = \.\,\:\;\?\!\"\(\)\'\`\<\> # FIXME: maybe remove backslashes ## CFG_BIBINDEX_REMOVE_HTML_MARKUP -- should we attempt to remove HTML markup ## before indexing? Use 1 if you have HTML markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_HTML_MARKUP = 0 ## CFG_BIBINDEX_REMOVE_LATEX_MARKUP -- should we attempt to remove LATEX markup ## before indexing? Use 1 if you have LATEX markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_LATEX_MARKUP = 0 ## CFG_BIBINDEX_MIN_WORD_LENGTH -- minimum word length allowed to be added to ## index. The terms smaller then this amount will be discarded. ## Useful to keep the database clean, however you can safely leave ## this value on 0 for up to 1,000,000 documents. CFG_BIBINDEX_MIN_WORD_LENGTH = 0 ## CFG_BIBINDEX_URLOPENER_USERNAME and CFG_BIBINDEX_URLOPENER_PASSWORD -- ## access credentials to access restricted URLs, interesting only if ## you are fulltext-indexing files located on a remote server that is ## only available via username/password. But it's probably better to ## handle this case via IP or some convention; the current scheme is ## mostly there for demo only. CFG_BIBINDEX_URLOPENER_USERNAME = mysuperuser CFG_BIBINDEX_URLOPENER_PASSWORD = mysuperpass ## CFG_INTBITSET_ENABLE_SANITY_CHECKS -- ## Enable sanity checks for integers passed to the intbitset data ## structures. It is good to enable this during debugging ## and to disable this value for speed improvements. CFG_INTBITSET_ENABLE_SANITY_CHECKS = False ####################################### ## Part 7: Access control parameters ## ####################################### ## This section contains some configuration parameters for the access ## control system. Please note that WebAccess is mostly configured on ## run-time via its WebAccess Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (If you do want to modify them during ## runtime, for example te deny access temporarily because of backups, ## you can edit access_control_config.py directly, no need to get back ## here and no need to redo the make process.) ## CFG_ACCESS_CONTROL_LEVEL_SITE -- defines how open this site is. ## Use 0 for normal operation of the site, 1 for read-only site (all ## write operations temporarily closed), 2 for site fully closed. ## Useful for site maintenance. CFG_ACCESS_CONTROL_LEVEL_SITE = 0 ## CFG_ACCESS_CONTROL_LEVEL_GUESTS -- guest users access policy. Use ## 0 to allow guest users, 1 not to allow them (all users must login). CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0 ## CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS -- account registration and ## activation policy. When 0, users can register and accounts are ## automatically activated. When 1, users can register but admin must ## activate the accounts. When 2, users cannot register nor update ## their email address, only admin can register accounts. When 3, ## users cannot register nor update email address nor password, only ## admin can register accounts. When 4, the same as 3 applies, nor ## user cannot change his login method. CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN -- limit account ## registration to certain email addresses? If wanted, give domain ## name below, e.g. "cern.ch". If not wanted, leave it empty. -CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN = +CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN = ## CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS -- send a ## notification email to the administrator when a new account is ## created? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT -- send a ## notification email to the user when a new account is created in order to ## to verify the validity of the provided email address? Use ## 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 1 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION -- send a ## notification email to the user when a new account is activated? ## Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION -- send a ## notification email to the user when a new account is deleted or ## account demand rejected? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0 ############################### ## FIXME: Undocumented ones: ## ############################### ## BibRank: CFG_BIBRANK_SHOW_READING_STATS = 1 CFG_BIBRANK_SHOW_DOWNLOAD_STATS = 1 CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS = 1 CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION = 0 CFG_BIBRANK_SHOW_CITATION_LINKS = 1 CFG_BIBRANK_SHOW_CITATION_STATS = 1 CFG_BIBRANK_SHOW_CITATION_GRAPHS = 1 ## WebComment: CFG_WEBCOMMENT_ALLOW_COMMENTS = 1 CFG_WEBCOMMENT_ALLOW_REVIEWS = 1 CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS = 0 CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN = 5 CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW = 1 CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW = 1 CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL = 1 CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS = 20 CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS = 20 # FIXME: not found in modules subdir?! -CFG_WEBCOMMENT_TIMELIMIT_VOTE_VALIDITY_IN_DAYS = 365 +CFG_WEBCOMMENT_TIMELIMIT_VOTE_VALIDITY_IN_DAYS = 365 # FIXME: not found in modules subdir?! CFG_WEBCOMMENT_TIMELIMIT_REPORT_VALIDITY_IN_DAYS = 100 ## BibSched: CFG_BIBSCHED_REFRESHTIME = 5 # CFG_BIBSCHED_LOG_PAGER = "/bin/more" -CFG_BIBSCHED_LOG_PAGER = None +CFG_BIBSCHED_LOG_PAGER = None ## WebAlert: ## CFG_WEBALERT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be send: CFG_WEBALERT_ALERT_ENGINE_EMAIL = cds.alert@cdsdev.cern.ch ########################## ## THAT's ALL, FOLKS! ## ########################## \ No newline at end of file diff --git a/modules/miscutil/lib/inveniocfg.py b/modules/miscutil/lib/inveniocfg.py index 0b565614f..d47483715 100644 --- a/modules/miscutil/lib/inveniocfg.py +++ b/modules/miscutil/lib/inveniocfg.py @@ -1,940 +1,941 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Invenio configuration and administration CLI tool. Usage: inveniocfg [options] General options: -h, --help print this help -V, --version print version number Options to finish your installation: --create-apache-conf create Apache configuration files --create-tables create DB tables for Invenio --drop-tables drop DB tables of Invenio Options to set up and test a demo site: --create-demo-site create demo site --load-demo-records load demo records --remove-demo-records remove demo records, keeping demo site --drop-demo-site drop demo site configurations too --run-unit-tests run unit test suite (need DB connectivity) --run-regression-tests run regression test suite (need demo site) Options to update config files in situ: --update-all perform all the update options --update-config-py update config.py file from invenio.conf file --update-dbquery-py update dbquery.py with DB credentials from invenio.conf --update-dbexec update dbexec with DB credentials from invenio.conf --update-bibconvert-tpl update bibconvert templates with CFG_SITE_URL from invenio.conf Options to update DB tables: --reset-all perform all the reset options --reset-sitename reset tables to take account of new CFG_SITE_NAME* --reset-siteadminemail reset tables to take account of new CFG_SITE_ADMIN_EMAIL --reset-fieldnames reset tables to take account of new I18N names from PO files Options to help the work: --list print names and values of all options from conf files --get get value of a given option from conf files --conf-dir path to directory where invenio*.conf files are [optional] --detect-system-details print system details such as Apache/Python/MySQL versions """ __revision__ = "$Id$" from ConfigParser import ConfigParser import os import re import shutil import socket import sys import tempfile def print_usage(): """Print help.""" print __doc__ def print_version(): """Print version information.""" print __revision__ def run_command(cmd): """ Run operating system command CMD (assumed to be washed already) and return tuple (exit status code, out stream, err stream). """ cmd_out = '' cmd_err = '' file_cmd_out = tempfile.mkstemp("inveniocfg-cmd-out")[1] file_cmd_err = tempfile.mkstemp("inveniocfg-cmd-err")[1] cmd_exit_code = os.system("%s > %s 2> %s" % (cmd, file_cmd_out, file_cmd_err)) if os.path.exists(file_cmd_out): cmd_out = open(file_cmd_out).read() os.remove(file_cmd_out) if os.path.exists(file_cmd_err): cmd_err = open(file_cmd_err).read() os.remove(file_cmd_err) return cmd_exit_code, cmd_out, cmd_err def convert_conf_option(option_name, option_value): """ Convert conf option into Python config.py line, converting values to ints or strings as appropriate. """ ## 1) convert option name to uppercase: option_name = option_name.upper() ## 2) convert option value to int or string: try: option_value = int(option_value) except ValueError: option_value = '"' + option_value + '"' ## 3a) special cases: regexps if option_name in ['CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS', 'CFG_BIBINDEX_CHARS_PUNCTUATION']: option_value = 'r"[' + option_value[1:-1] + ']"' ## 3b) special cases: True, False, None if option_value in ['"True"', '"False"', '"None"']: option_value = option_value[1:-1] ## 3c) special cases: dicts or lists if option_name in ['CFG_WEBSEARCH_FIELDS_CONVERT', - 'CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS']: + 'CFG_WEBSEARCH_USE_JSMATH_FOR_FORMATS', + 'CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS']: option_value = option_value[1:-1] ## 3d) special cases: CFG_SITE_LANGS if option_name == 'CFG_SITE_LANGS': out = "[" for lang in option_value[1:-1].split(","): out += "'%s', " % lang out += "]" option_value = out ## 3e) special cases: multiline if option_name == 'CFG_OAI_IDENTIFY_DESCRIPTION': # make triple quotes option_value = '""' + option_value + '""' ## 3f) ignore some options: if option_name == 'CFG_SITE_NAME_INTL': # treated elsewhere return ## 4) finally, return output line: return '%s = %s' % (option_name, option_value) def cli_cmd_update_config_py(conf): """ Update new config.py from conf options, keeping previous config.py in a backup copy. """ print ">>> Going to update config.py..." ## location where config.py is: configpyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'config.py' ## backup current config.py file: if os.path.exists(configpyfile): shutil.copy(configpyfile, configpyfile + '.OLD') ## here we go: fdesc = open(configpyfile, 'w') ## generate preamble: fdesc.write("# -*- coding: utf-8 -*-\n") fdesc.write("# DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED\n") fdesc.write("# FROM INVENIO.CONF BY EXECUTING:\n") fdesc.write("# " + " ".join(sys.argv) + "\n") ## special treatment for CFG_SITE_NAME_INTL options: fdesc.write("CFG_SITE_NAME_INTL = {}\n") for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): fdesc.write("CFG_SITE_NAME_INTL['%s'] = \"%s\"\n" % (lang, conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang))) ## process all the options normally: for section in conf.sections(): for option in conf.options(section): if not option.startswith('CFG_DATABASE_'): # put all options except for db credentials into config.py line_out = convert_conf_option(option, conf.get(section, option)) if line_out: fdesc.write(line_out + "\n") ## generate postamble: fdesc.write("") fdesc.write("# END OF GENERATED FILE") ## we are done: fdesc.close() print "You may want to restart Apache now." print ">>> config.py updated successfully." def cli_cmd_update_dbquery_py(conf): """ Update lib/dbquery.py file with DB parameters read from conf file. Note: this edits dbquery.py in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbquery.py..." ## location where dbquery.py is: dbquerypyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'dbquery.py' ## backup current dbquery.py file: if os.path.exists(dbquerypyfile): shutil.copy(dbquerypyfile, dbquerypyfile + '.OLD') ## replace db parameters: out = '' for line in open(dbquerypyfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|NAME|USER|PASS)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get('Invenio', dbparam)) else: out += line fdesc = open(dbquerypyfile, 'w') fdesc.write(out) fdesc.close() print "You may want to restart Apache now." print ">>> dbquery.py updated successfully." def cli_cmd_update_dbexec(conf): """ Update bin/dbexec file with DB parameters read from conf file. Note: this edits dbexec in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbexec..." ## location where dbexec is: dbexecfile = conf.get("Invenio", "CFG_BINDIR") + \ os.sep + 'dbexec' ## backup current dbexec file: if os.path.exists(dbexecfile): shutil.copy(dbexecfile, dbexecfile + '.OLD') ## replace db parameters via sed: out = '' for line in open(dbexecfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|NAME|USER|PASS)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get("Invenio", dbparam)) else: out += line fdesc = open(dbexecfile, 'w') fdesc.write(out) fdesc.close() print ">>> dbexec updated successfully." def cli_cmd_update_bibconvert_tpl(conf): """ Update bibconvert/config/*.tpl files looking for 856 http://.../record/ lines, replacing URL with CDSWEB taken from conf file. Note: this edits tpl files in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update bibconvert templates..." ## location where bibconvert/config/*.tpl are: tpldir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'bibconvert' + os.sep + 'config' ## find all *.tpl files: for tplfilename in os.listdir(tpldir): if tplfilename.endswith(".tpl"): ## change tpl file: tplfile = tpldir + os.sep + tplfilename shutil.copy(tplfile, tplfile + '.OLD') out = '' for line in open(tplfile, 'r').readlines(): match = re.search(r'^(.*)http://.*?/record/(.*)$', line) if match: out += "%s%s/record/%s\n" % (match.group(1), conf.get("Invenio", 'CFG_SITE_URL'), match.group(2)) else: out += line fdesc = open(tplfile, 'w') fdesc.write(out) fdesc.close() print ">>> bibconvert templates updated successfully." def cli_cmd_reset_sitename(conf): """ Reset collection-related tables with new CFG_SITE_NAME and CFG_SITE_NAME_INTL* read from conf files. """ print ">>> Going to reset CFG_SITE_NAME and CFG_SITE_NAME_INTL..." from invenio.dbquery import run_sql, IntegrityError # reset CFG_SITE_NAME: sitename = conf.get("Invenio", "CFG_SITE_NAME") try: run_sql("""INSERT INTO collection (id, name, dbquery, reclist, restricted) VALUES (1,%s,NULL,NULL,NULL)""", (sitename,)) except IntegrityError: run_sql("""UPDATE collection SET name=%s WHERE id=1""", (sitename,)) # reset CFG_SITE_NAME_INTL: for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): sitename_lang = conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang) try: run_sql("""INSERT INTO collectionname (id_collection, ln, type, value) VALUES (%s,%s,%s,%s)""", (1, lang, 'ln', sitename_lang)) except IntegrityError: run_sql("""UPDATE collectionname SET value=%s WHERE ln=%s AND id_collection=1 AND type='ln'""", (sitename_lang, lang)) print "You may want to restart Apache now." print ">>> CFG_SITE_NAME and CFG_SITE_NAME_INTL* reset successfully." def cli_cmd_reset_siteadminemail(conf): """ Reset user-related tables with new CFG_SITE_ADMIN_EMAIL read from conf files. """ print ">>> Going to reset CFG_SITE_ADMIN_EMAIL..." from invenio.dbquery import run_sql siteadminemail = conf.get("Invenio", "CFG_SITE_ADMIN_EMAIL") run_sql("DELETE FROM user WHERE id=1") run_sql("""INSERT INTO user (id, email, password, note, nickname) VALUES (1, %s, AES_ENCRYPT(email, ''), 1, 'admin')""", (siteadminemail,)) print "You may want to restart Apache now." print ">>> CFG_SITE_ADMIN_EMAIL reset successfully." def cli_cmd_reset_fieldnames(conf): """ Reset I18N field names such as author, title, etc and other I18N ranking method names such as word similarity. Their translations are taken from the PO files. """ print ">>> Going to reset I18N field names..." from invenio.messages import gettext_set_language, language_list_long from invenio.dbquery import run_sql, IntegrityError ## get field id and name list: field_id_name_list = run_sql("SELECT id, name FROM field") ## get rankmethod id and name list: rankmethod_id_name_list = run_sql("SELECT id, name FROM rnkMETHOD") ## update names for every language: for lang, dummy in language_list_long(): _ = gettext_set_language(lang) ## this list is put here in order for PO system to pick names ## suitable for translation field_name_names = {"any field": _("any field"), "title": _("title"), "author": _("author"), "abstract": _("abstract"), "keyword": _("keyword"), "report number": _("report number"), "subject": _("subject"), "reference": _("reference"), "fulltext": _("fulltext"), "collection": _("collection"), "division": _("division"), "year": _("year"), "experiment": _("experiment"), "record ID": _("record ID")} ## update I18N names for every language: for (field_id, field_name) in field_id_name_list: if field_name_names.has_key(field_name): try: run_sql("""INSERT INTO fieldname (id_field,ln,type,value) VALUES (%s,%s,%s,%s)""", (field_id, lang, 'ln', field_name_names[field_name])) except IntegrityError: run_sql("""UPDATE fieldname SET value=%s WHERE id_field=%s AND ln=%s AND type=%s""", (field_name_names[field_name], field_id, lang, 'ln',)) ## ditto for rank methods: rankmethod_name_names = {"wrd": _("word similarity"), "demo_jif": _("journal impact factor"), "citation": _("times cited"),} for (rankmethod_id, rankmethod_name) in rankmethod_id_name_list: try: run_sql("""INSERT INTO rnkMETHODNAME (id_rnkMETHOD,ln,type,value) VALUES (%s,%s,%s,%s)""", (rankmethod_id, lang, 'ln', rankmethod_name_names[rankmethod_name])) except IntegrityError: run_sql("""UPDATE rnkMETHODNAME SET value=%s WHERE id_rnkMETHOD=%s AND ln=%s AND type=%s""", (rankmethod_name_names[rankmethod_name], rankmethod_id, lang, 'ln',)) print ">>> I18N field names reset successfully." def test_db_connection(): """ Test DB connection, and if fails, advise user how to set it up. Useful to be called during table creation. """ print "Testing DB connection...", from invenio.textutils import wrap_text_in_a_box from invenio.dbquery import run_sql, Error ## first, test connection to the DB server: try: run_sql("SHOW TABLES") except Error, err: from invenio.dbquery import CFG_DATABASE_HOST, CFG_DATABASE_NAME, \ CFG_DATABASE_USER, CFG_DATABASE_PASS print wrap_text_in_a_box("""\ DATABASE CONNECTIVITY ERROR %(errno)d: %(errmsg)s.\n Perhaps you need to set up database and connection rights? If yes, then please login as MySQL admin user and run the following commands now: $ mysql -h %(dbhost)s -u root -p mysql mysql> CREATE DATABASE %(dbname)s DEFAULT CHARACTER SET utf8; mysql> GRANT ALL PRIVILEGES ON %(dbname)s.* TO %(dbuser)s@%(webhost)s IDENTIFIED BY '%(dbpass)s'; mysql> QUIT The values printed above were detected from your configuration. If they are not right, then please edit your invenio.conf file and rerun 'inveniocfg --update-all' first. If the problem is of different nature, then please inspect the above error message and fix the problem before continuing.""" % \ {'errno': err.args[0], 'errmsg': err.args[1], 'dbname': CFG_DATABASE_NAME, 'dbhost': CFG_DATABASE_HOST, 'dbuser': CFG_DATABASE_USER, 'dbpass': CFG_DATABASE_PASS, 'webhost': CFG_DATABASE_HOST == 'localhost' and 'localhost' or os.popen('hostname -f', 'r').read().strip(), }) sys.exit(1) print "ok" ## second, test insert/select of a Unicode string to detect ## possible Python/MySQL/MySQLdb mis-setup: print "Testing Python/MySQL/MySQLdb UTF-8 chain...", try: beta_in_utf8 = "β" # Greek beta in UTF-8 is 0xCEB2 run_sql("CREATE TEMPORARY TABLE test__invenio__utf8 (x char(1), y varbinary(2)) DEFAULT CHARACTER SET utf8") run_sql("INSERT INTO test__invenio__utf8 (x, y) VALUES (%s, %s)", (beta_in_utf8, beta_in_utf8)) res = run_sql("SELECT x,y,HEX(x),HEX(y),LENGTH(x),LENGTH(y),CHAR_LENGTH(x),CHAR_LENGTH(y) FROM test__invenio__utf8") assert res[0] == ('\xce\xb2', '\xce\xb2', 'CEB2', 'CEB2', 2L, 2L, 1L, 2L) run_sql("DROP TEMPORARY TABLE test__invenio__utf8") except Exception, err: print wrap_text_in_a_box("""\ DATABASE RELATED ERROR %s\n A problem was detected with the UTF-8 treatment in the chain between the Python application, the MySQLdb connector, and the MySQL database. You may perhaps have installed older versions of some prerequisite packages?\n Please check the INSTALL file and please fix this problem before continuing.""" % err) sys.exit(1) print "ok" def cli_cmd_create_tables(conf): """Create and fill Invenio DB tables. Useful for the installation process.""" print ">>> Going to create and fill tables..." from invenio.config import CFG_PREFIX test_db_connection() for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabcreate.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/dbexec < %s/lib/sql/invenio/tabfill.sql" % (CFG_PREFIX, CFG_PREFIX)]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) for cmd in ["%s/bin/webaccessadmin -u admin -c -a" % CFG_PREFIX]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables created and filled successfully." def cli_cmd_drop_tables(conf): """Drop Invenio DB tables. Useful for the uninstallation process.""" print ">>> Going to drop tables..." from invenio.config import CFG_PREFIX from invenio.textutils import wrap_text_in_a_box, wait_for_user if '--yes-i-know' not in sys.argv: wait_for_user(wrap_text_in_a_box("""\ WARNING: You are going to destroy your database tables!\n Press Ctrl-C if you want to abort this action.\n Press ENTER to proceed with this action.""")) cmd = "%s/bin/dbexec < %s/lib/sql/invenio/tabdrop.sql" % (CFG_PREFIX, CFG_PREFIX) if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables dropped successfully." def cli_cmd_create_demo_site(conf): """Create demo site. Useful for testing purposes.""" print ">>> Going to create demo site..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/democfgdata.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/webaccessadmin -u admin -c -r -D" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo site created successfully." def cli_cmd_load_demo_records(conf): """Load demo records. Useful for testing purposes.""" from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql print ">>> Going to load demo records..." run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/bibupload -i %s/var/tmp/demobibdata.xml" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/bibupload 1" % CFG_PREFIX, "%s/bin/bibindex -u admin" % CFG_PREFIX, "%s/bin/bibindex 2" % CFG_PREFIX, "%s/bin/bibreformat -u admin -o HB" % CFG_PREFIX, "%s/bin/bibreformat 3" % CFG_PREFIX, "%s/bin/bibupload 4" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 5" % CFG_PREFIX, "%s/bin/bibrank -u admin" % CFG_PREFIX, "%s/bin/bibrank 6" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records loaded successfully." def cli_cmd_remove_demo_records(conf): """Remove demo records. Useful when you are finished testing.""" print ">>> Going to remove demo records..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql from invenio.textutils import wrap_text_in_a_box, wait_for_user if '--yes-i-know' not in sys.argv: wait_for_user(wrap_text_in_a_box("""\ WARNING: You are going to destroy your records and documents!\n Press Ctrl-C if you want to abort this action.\n Press ENTER to proceed with this action.""")) if os.path.exists(CFG_PREFIX + os.sep + 'var' + os.sep + 'data' + os.sep + 'files'): shutil.rmtree(CFG_PREFIX + os.sep + 'var' + os.sep + 'data' + os.sep + 'files') run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabbibclean.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records removed successfully." def cli_cmd_drop_demo_site(conf): """Drop demo site completely. Useful when you are finished testing.""" print ">>> Going to drop demo site..." from invenio.textutils import wrap_text_in_a_box, wait_for_user if '--yes-i-know' not in sys.argv: wait_for_user(wrap_text_in_a_box("""\ WARNING: You are going to destroy your site and documents!\n Press Ctrl-C if you want to abort this action.\n Press ENTER to proceed with this action.""")) cli_cmd_drop_tables(conf) cli_cmd_create_tables(conf) cli_cmd_remove_demo_records(conf) print ">>> Demo site dropped successfully." def cli_cmd_run_unit_tests(conf): """Run unit tests, usually on the working demo site.""" from invenio.config import CFG_PREFIX os.system("%s/bin/testsuite" % CFG_PREFIX) def cli_cmd_run_regression_tests(conf): """Run regression tests, usually on the working demo site.""" from invenio.config import CFG_PREFIX if '--yes-i-know' in sys.argv: os.system("%s/bin/regressiontestsuite --yes-i-know" % CFG_PREFIX) else: os.system("%s/bin/regressiontestsuite" % CFG_PREFIX) def cli_cmd_create_apache_conf(conf): """ Create Apache conf files for this site, keeping previous files in a backup copy. """ print ">>> Going to create Apache conf files..." from invenio.textutils import wrap_text_in_a_box apache_conf_dir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'apache' if not os.path.exists(apache_conf_dir): os.mkdir(apache_conf_dir) apache_vhost_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost.conf' apache_vhost_ssl_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost-ssl.conf' apache_vhost_body = """\ AddDefaultCharset UTF-8 ServerSignature Off ServerTokens Prod NameVirtualHost *:80 Listen 80 deny from all deny from all ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s DocumentRoot %(webdir)s Options FollowSymLinks MultiViews AllowOverride None Order allow,deny allow from all ErrorLog %(logdir)s/apache.err LogLevel warn CustomLog %(logdir)s/apache.log combined DirectoryIndex index.en.html index.html SetHandler python-program PythonHandler invenio.webinterface_layout PythonDebug On AddHandler python-program .py PythonHandler mod_python.publisher PythonDebug On """ % {'servername': conf.get('Invenio', 'CFG_SITE_URL').replace("http://", ""), 'serveralias': conf.get('Invenio', 'CFG_SITE_URL').replace("http://", "").split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), } apache_vhost_ssl_body = """\ ServerSignature Off ServerTokens Prod Listen 443 NameVirtualHost *:443 #SSLCertificateFile /etc/apache2/ssl/apache.pem SSLCertificateFile /etc/apache2/ssl/server.crt SSLCertificateKeyFile /etc/apache2/ssl/server.key deny from all deny from all ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s SSLEngine on DocumentRoot %(webdir)s Options FollowSymLinks MultiViews AllowOverride None Order allow,deny allow from all ErrorLog %(logdir)s/apache-ssl.err LogLevel warn CustomLog %(logdir)s/apache-ssl.log combined DirectoryIndex index.en.html index.html SetHandler python-program PythonHandler invenio.webinterface_layout PythonDebug On AddHandler python-program .py PythonHandler mod_python.publisher PythonDebug On """ % {'servername': conf.get('Invenio', 'CFG_SITE_SECURE_URL').replace("https://", ""), 'serveralias': conf.get('Invenio', 'CFG_SITE_SECURE_URL').replace("https://", "").split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), } # write HTTP vhost snippet: if os.path.exists(apache_vhost_file): shutil.copy(apache_vhost_file, apache_vhost_file + '.OLD') fdesc = open(apache_vhost_file, 'w') fdesc.write(apache_vhost_body) fdesc.close() print "Created file", apache_vhost_file # write HTTPS vhost snippet: if conf.get('Invenio', 'CFG_SITE_SECURE_URL') != \ conf.get('Invenio', 'CFG_SITE_URL'): if os.path.exists(apache_vhost_ssl_file): shutil.copy(apache_vhost_ssl_file, apache_vhost_ssl_file + '.OLD') fdesc = open(apache_vhost_ssl_file, 'w') fdesc.write(apache_vhost_ssl_body) fdesc.close() print "Created file", apache_vhost_ssl_file print wrap_text_in_a_box("""\ Apache virtual host configurations for your site have been created. You can check created files and put the following include statements in your httpd.conf:\n Include %s Include %s """ % (apache_vhost_file, apache_vhost_ssl_file)) print ">>> Apache conf files created." def cli_cmd_get(conf, varname): """ Return value of VARNAME read from CONF files. Useful for third-party programs to access values of conf options such as CFG_PREFIX. Return None if VARNAME is not found. """ # do not pay attention to upper/lower case: varname = varname.lower() # do not pay attention to section names yet: all_options = {} for section in conf.sections(): for option in conf.options(section): all_options[option] = conf.get(section, option) return all_options.get(varname, None) def cli_cmd_list(conf): """ Print a list of all conf options and values from CONF. """ for section in conf.sections(): for option in conf.options(section): print option, '=', conf.get(section, option) def detect_apache_version(): """ Try to detect Apache httpd version by localizing httpd or apache2 executables and digging into binary. Return Apache version as a string. Return empty string if not succeed. """ apache_version = "" dummy1, cmd_out, dummy2 = run_command("locate bin/httpd bin/apache2") for apache in cmd_out.split("\n"): if os.path.exists(apache): dummy3, cmd2_out, dummy4 = run_command("strings %s | grep ^Apache\/" % apache) if cmd2_out: for cmd2_out_line in cmd2_out.split("\n"): if len(cmd2_out_line) > len(apache_version): # the longest the better apache_version = cmd2_out_line return apache_version def detect_modpython_version(): """ Try to detect mod_python version. Return mod_python version as a string. Return empty string if no success. """ try: from mod_python import version except ImportError: # try to detect via looking at mod_python.so: version = "" dummy1, cmd_out, dummy2 = run_command("locate /mod_python.so") for modpython in cmd_out.split("\n"): if os.path.exists(modpython): dummy3, cmd2_out, dummy4 = run_command("strings %s | grep ^mod_python\/" % modpython) if cmd2_out: for cmd2_out_line in cmd2_out.split("\n"): if len(cmd2_out_line) > len(version): # the longest the better version = cmd2_out_line return version def cli_cmd_detect_system_details(conf): """ Detect and print system details such as Apache/Python/MySQL versions etc. Useful for debugging problems on various OS. """ import MySQLdb print ">>> Going to detect system details..." print "* Hostname: " + socket.gethostname() print "* Invenio version: " + conf.get("Invenio", "CFG_VERSION") print "* Python version: " + sys.version.replace("\n", " ") print "* Apache version: " + detect_apache_version() print "* mod_python version: " + detect_modpython_version() print "* MySQLdb version: " + MySQLdb.__version__ try: from invenio.dbquery import run_sql print "* MySQL version:" for key, val in run_sql("SHOW VARIABLES LIKE 'version%'") + \ run_sql("SHOW VARIABLES LIKE 'charact%'") + \ run_sql("SHOW VARIABLES LIKE 'collat%'"): if False: print " - %s: %s" % (key, val) elif key in ['version', 'character_set_connection', 'character_set_database', 'character_set_server', 'collation_connection', 'collation_database', 'collation_server']: print " - %s: %s" % (key, val) except ImportError: print "* ERROR: cannot import dbquery" print ">>> System details detected successfully." def main(): """Main entry point.""" conf = ConfigParser() if '--help' in sys.argv or \ '-h' in sys.argv: print_usage() elif '--version' in sys.argv or \ '-V' in sys.argv: print_version() else: confdir = None if '--conf-dir' in sys.argv: try: confdir = sys.argv[sys.argv.index('--conf-dir') + 1] except IndexError: pass # missing --conf-dir argument value if not os.path.exists(confdir): print "ERROR: bad or missing --conf-dir option value." sys.exit(1) else: ## try to detect path to conf dir (relative to this bin dir): confdir = re.sub(r'/bin$', '/etc', sys.path[0]) ## read conf files: for conffile in [confdir + os.sep + 'invenio.conf', confdir + os.sep + 'invenio-autotools.conf', confdir + os.sep + 'invenio-local.conf',]: if os.path.exists(conffile): conf.read(conffile) else: if not conffile.endswith("invenio-local.conf"): # invenio-local.conf is optional, otherwise stop print "ERROR: Badly guessed conf file location", conffile print "(Please use --conf-dir option.)" sys.exit(1) ## decide what to do: done = False for opt_idx in range(0, len(sys.argv)): opt = sys.argv[opt_idx] if opt == '--conf-dir': # already treated before, so skip silently: pass elif opt == '--get': try: varname = sys.argv[opt_idx + 1] except IndexError: print "ERROR: bad or missing --get option value." sys.exit(1) if varname.startswith('-'): print "ERROR: bad or missing --get option value." sys.exit(1) varvalue = cli_cmd_get(conf, varname) if varvalue is not None: print varvalue else: sys.exit(1) done = True elif opt == '--list': cli_cmd_list(conf) done = True elif opt == '--detect-system-details': cli_cmd_detect_system_details(conf) done = True elif opt == '--create-tables': cli_cmd_create_tables(conf) done = True elif opt == '--drop-tables': cli_cmd_drop_tables(conf) done = True elif opt == '--create-demo-site': cli_cmd_create_demo_site(conf) done = True elif opt == '--load-demo-records': cli_cmd_load_demo_records(conf) done = True elif opt == '--remove-demo-records': cli_cmd_remove_demo_records(conf) done = True elif opt == '--drop-demo-site': cli_cmd_drop_demo_site(conf) done = True elif opt == '--run-unit-tests': cli_cmd_run_unit_tests(conf) done = True elif opt == '--run-regression-tests': cli_cmd_run_regression_tests(conf) done = True elif opt == '--update-all': cli_cmd_update_config_py(conf) cli_cmd_update_dbquery_py(conf) cli_cmd_update_dbexec(conf) cli_cmd_update_bibconvert_tpl(conf) done = True elif opt == '--update-config-py': cli_cmd_update_config_py(conf) done = True elif opt == '--update-dbquery-py': cli_cmd_update_dbquery_py(conf) done = True elif opt == '--update-dbexec': cli_cmd_update_dbexec(conf) done = True elif opt == '--update-bibconvert-tpl': cli_cmd_update_bibconvert_tpl(conf) done = True elif opt == '--reset-all': cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) done = True elif opt == '--reset-sitename': cli_cmd_reset_sitename(conf) done = True elif opt == '--reset-siteadminemail': cli_cmd_reset_siteadminemail(conf) done = True elif opt == '--reset-fieldnames': cli_cmd_reset_fieldnames(conf) done = True elif opt == '--create-apache-conf': cli_cmd_create_apache_conf(conf) done = True elif opt.startswith("-") and opt != '--yes-i-know': print "ERROR: unknown option", opt sys.exit(1) if not done: print """ERROR: Please specify a command. Please see '--help'.""" sys.exit(1) if __name__ == '__main__': main() diff --git a/modules/websubmit/lib/bibdocfile.py b/modules/websubmit/lib/bibdocfile.py index 5b7886578..5771fc0d5 100644 --- a/modules/websubmit/lib/bibdocfile.py +++ b/modules/websubmit/lib/bibdocfile.py @@ -1,1382 +1,1391 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" import os import re import shutil import md5 import filecmp import time from datetime import datetime from xml.sax.saxutils import quoteattr from mimetypes import MimeTypes +try: + set() +except NameError: + from sets import Set as set + from invenio.dbquery import run_sql, DatabaseError from invenio.errorlib import register_exception from invenio.access_control_engine import acc_authorize_action -from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, CFG_SITE_URL, CFG_WEBDIR, CFG_WEBSUBMIT_FILEDIR, CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT, CFG_SITE_SECURE_URL +from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, CFG_SITE_URL,\ + CFG_WEBDIR, CFG_WEBSUBMIT_FILEDIR,\ + CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS, \ + CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT, CFG_SITE_SECURE_URL import invenio.template websubmit_templates = invenio.template.load('websubmit') websearch_templates = invenio.template.load('websearch') CFG_BIBDOCFILE_MD5_THRESHOLD = 256 * 1024 CFG_BIBDOCFILE_MD5_BUFFER = 1024 * 1024 CFG_BIBDOCFILE_MD5SUM_EXISTS = os.system('which md5sum 2>&1 > /dev/null') == 0 _mimes = MimeTypes() _mimes.suffix_map.update({'.tbz2' : '.tar.bz2'}) _mimes.encodings_map.update({'.bz2' : 'bzip2'}) _extensions = _mimes.encodings_map.keys() + \ _mimes.suffix_map.keys() + \ - _mimes.types_map[1].keys() + _mimes.types_map[1].keys() + \ + CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS _extensions.sort() _extensions.reverse() -_extensions = [ext.lower() for ext in _extensions] +_extensions = set([ext.lower() for ext in _extensions]) class InvenioWebSubmitFileError(Exception): pass def file_strip_ext(file): """Strip in the best way the extension from a filename""" lowfile = file.lower() ext = '.' while ext: ext = '' for c_ext in _extensions: if lowfile.endswith(c_ext): lowfile = lowfile[0:-len(c_ext)] ext = c_ext break return file[:len(lowfile)] def normalize_format(format): """Normalize the format.""" #format = format.lower() if format and format[0] != '.': format = '.' + format #format = format.replace('.jpg', '.jpeg') return format _docname_re = re.compile(r'[^-\w.]*') def normalize_docname(docname): """Normalize the docname (only digit and alphabetic letters and underscore are allowed)""" #return _docname_re.sub('', docname) return docname def normalize_version(version): """Normalize the version.""" try: int(version) except ValueError: if version.lower().strip() == 'all': return 'all' else: return '' return str(version) _path_re = re.compile(r'.*[\\/:]') def decompose_file(file): """Decompose a file into dirname, basename and extension""" basename = _path_re.sub('', file) dirname = file[:-len(basename)-1] base = file_strip_ext(basename) extension = basename[len(base) + 1:] return (dirname, base, extension) def propose_unique_name(file, use_version=False): """Propose a unique name, taking in account the version""" if use_version: version = ';'+re.sub('.*;', '', file) file = file[:-len(version)] else: version = '' (basedir, basename, extension) = decompose_file(file) if extension: # Sometimes the extension wasn't guessed extension = '.' + extension goodname = "%s%s%s" % (basename, extension, version) i = 1 listdir = os.listdir(basedir) while goodname in listdir: i += 1 goodname = "%s_%s%s%s" % (basename, i, extension, version) return "%s/%s" % (basedir, goodname) class BibRecDocs: """this class represents all the files attached to one record""" def __init__(self, recid): self.id = recid self.bibdocs = [] self.build_bibdoc_list() def __repr__(self): return 'BibRecDocs(%s)' % self.id def __str__(self): out = '%i::::total bibdocs attached=%i\n' % (self.id, len(self.bibdocs)) out += '%i::::total size latest version=%s\n' % (self.id, nice_size(self.get_total_size_latest_version())) out += '%i::::total size all files=%s\n' % (self.id, nice_size(self.get_total_size())) for bibdoc in self.bibdocs: out += str(bibdoc) return out def get_total_size_latest_version(self): """Return the total size used on disk of all the files belonging to this record and corresponding to the latest version.""" size = 0 for bibdoc in self.bibdocs: size += bibdoc.get_total_size_latest_version() return size def get_total_size(self): """Return the total size used on disk of all the files belonging to this record of any version.""" size = 0 for bibdoc in self.bibdocs: size += bibdoc.get_total_size() return size def build_bibdoc_list(self): """This function must be called everytime a bibdoc connected to this recid is added, removed or modified. """ self.bibdocs = [] res = run_sql("""SELECT id_bibdoc, type, status FROM bibrec_bibdoc JOIN bibdoc ON id=id_bibdoc WHERE id_bibrec=%s AND status<>'DELETED' ORDER BY docname ASC""", (self.id,)) for row in res: cur_doc = BibDoc(docid=row[0], recid=self.id, doctype=row[1]) self.bibdocs.append(cur_doc) def list_bibdocs(self, doctype=''): """Returns the list all bibdocs object belonging to a recid. If doctype is set, it returns just the bibdocs of that doctype. """ if not doctype: return self.bibdocs else: return [bibdoc for bibdoc in self.bibdocs if doctype == bibdoc.doctype] def get_bibdoc_names(self, doctype=''): """Returns the names of the files associated with the bibdoc of a paritcular doctype""" return [bibdoc.docname for bibdoc in self.list_bibdocs(doctype)] def check_file_exists(self, path): """Returns 1 if the recid has a file identical to the one stored in path.""" size = os.path.getsize(path) # Let's consider all the latest files files = self.list_latest_files() # Let's consider all the latest files with same size potential = [file for file in files if file.get_size() == size] if potential: checksum = calculate_md5(path) # Let's consider all the latest files with the same size and the # same checksum potential = [file for file in potential if file.get_checksum() == checksum] if potential: potential = [file for file in potential if filecmp.cmp(file.get_full_path(), path)] if potential: return True else: # Gosh! How unlucky, same size, same checksum but not same # content! pass return False def propose_unique_docname(self, docname): """Propose a unique docname.""" docname = normalize_docname(docname) goodname = docname i = 1 while goodname in self.get_bibdoc_names(): i += 1 goodname = "%s_%s" % (docname, i) return goodname def get_docid(self, docname): """Returns the docid corresponding to the given docname, if the docname is valid. """ for bibdoc in self.bibdocs: if bibdoc.docname == docname: return bibdoc.id raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \ "docname '%s'" % (self.id, docname) def get_docname(self, docid): """Returns the docname corresponding to the given docid, if the docid is valid. """ for bibdoc in self.bibdocs: if bibdoc.id == docid: return bibdoc.docname raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \ "docid '%s'" % (self.id, docid) def get_bibdoc(self, docname): """Returns the bibdoc with a particular docname associated with this recid""" for bibdoc in self.bibdocs: if bibdoc.docname == docname: return bibdoc raise InvenioWebSubmitFileError, "Recid '%s' is not connected with " \ " docname '%s'" % (self.id, docname) def delete_bibdoc(self, docname): """Deletes a docname associated with the recid.""" for bibdoc in self.bibdocs: if bibdoc.docname == docname: bibdoc.delete() self.build_bibdoc_list() def add_bibdoc(self, doctype="Main", docname='file', never_fail=False): """Creates a new bibdoc associated with the recid, with a file called docname and a particular doctype. It returns the bibdoc object which was just created. If never_fail is True then the system will always be able to create a bibdoc. """ docname = normalize_docname(docname) if never_fail: docname = self.propose_unique_docname(docname) if docname in self.get_bibdoc_names(): raise InvenioWebSubmitFileError, "%s has already a bibdoc with docname %s" % (self.id, docname) else: bibdoc = BibDoc(recid=self.id, doctype=doctype, docname=docname) self.build_bibdoc_list() return bibdoc def add_new_file(self, fullpath, doctype="Main", docname='', never_fail=False): """Adds a new file with the following policy: if the docname is not set it is retrieved from the name of the file. If bibdoc with the given docname doesn't exist, it is created and the file is added to it. It it exist but it doesn't contain the format that is being added, the new format is added. If the format already exists then if never_fail is True a new bibdoc is created with a similar name but with a progressive number as a suffix and the file is added to it. The elaborated bibdoc is returned. """ if not docname: docname = decompose_file(fullpath)[1] docname = normalize_docname(docname) try: bibdoc = self.get_bibdoc(docname) except InvenioWebSubmitFileError: # bibdoc doesn't already exists! bibdoc = self.add_bibdoc(doctype, docname, False) bibdoc.add_file_new_version(fullpath) else: try: bibdoc.add_file_new_format(fullpath) except InvenioWebSubmitFileError, e: # Format already exist! if never_fail: bibdoc = self.add_bibdoc(doctype, docname, True) bibdoc.add_file_new_version(fullpath) else: raise e return bibdoc def add_new_version(self, fullpath, docname): """Adds a new fullpath file to an already existent docid making the previous files associated with the same bibdocids obsolete. It returns the bibdoc object. """ bibdoc = self.get_bibdoc(docname=docname) bibdoc.add_file_new_version(fullpath) return bibdoc def add_new_format(self, fullpath, docname): """Adds a new format for a fullpath file to an already existent docid along side already there files. It returns the bibdoc object. """ bibdoc = self.get_bibdoc(docname=docname) bibdoc.add_file_new_format(fullpath) return bibdoc def list_latest_files(self, doctype=''): """Returns a list which is made up by all the latest docfile of every bibdoc (of a particular doctype). """ docfiles = [] for bibdoc in self.list_bibdocs(doctype): docfiles += bibdoc.list_latest_files() return docfiles def display(self, docname="", version="", doctype="", ln=CFG_SITE_LANG): """Returns a formatted panel with information and links about a given docid of a particular version (or any), of a particular doctype (or any) """ t = "" if docname: try: bibdocs = [self.get_bibdoc(docname)] except InvenioWebSubmitFileError: bibdocs = self.list_bibdocs(doctype) else: bibdocs = self.list_bibdocs(doctype) if bibdocs: types = list_types_from_array(bibdocs) fulltypes = [] for mytype in types: fulltype = { 'name' : mytype, 'content' : [], } for bibdoc in bibdocs: if mytype == bibdoc.get_type(): fulltype['content'].append(bibdoc.display(version, ln = ln)) fulltypes.append(fulltype) t = websubmit_templates.tmpl_bibrecdoc_filelist( ln=ln, types = fulltypes, ) return t def fix(self, docname): """Algorithm that transform an a broken/old bibdoc into a coherent one: i.e. the corresponding folder will have files named after the bibdoc name. Proper .recid, .type, .md5 files will be created/updated. In case of more than one file with the same format revision a new bibdoc will be created in order to put does files. Returns the list of newly created bibdocs if any. """ bibdoc = self.get_bibdoc(docname) versions = {} res = [] new_bibdocs = [] # List of files with the same version/format of # existing file which need new bibdoc. counter = 0 zero_version_bug = False if os.path.exists(bibdoc.basedir): for filename in os.listdir(bibdoc.basedir): if filename[0] != '.' and ';' in filename: name, version = filename.split(';') try: version = int(version) except ValueError: # Strange name, let's skip it... register_exception() continue if version == 0: zero_version_bug = True format = name[len(file_strip_ext(name)):] format = normalize_format(format) if not versions.has_key(version): versions[version] = {} new_name = 'FIXING-%s-%s' % (str(counter), name) try: shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name), e) if versions[version].has_key(format): new_bibdocs.append((new_name, version)) else: versions[version][format] = new_name counter += 1 if not versions: bibdoc.delete() else: for version, formats in versions.iteritems(): if zero_version_bug: version += 1 for format, filename in formats.iteritems(): destination = '%s%s;%i' % (docname, format, version) try: shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination), e) try: open("%s/.recid" % bibdoc.basedir, "w").write(str(self.id)) open("%s/.type" % bibdoc.basedir, "w").write(str(bibdoc.doctype)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Error in creating .recid and .type file for '%s' folder: '%s'" % (bibdoc.basedir, e) self.build_bibdoc_list() res = [] for (filename, version) in new_bibdocs: if zero_version_bug: version += 1 new_bibdoc = self.add_bibdoc(doctype=bibdoc.doctype, docname=docname, never_fail=True) new_bibdoc.add_file_new_format('%s/%s' % (bibdoc.basedir, filename), version) res.append(new_bibdoc) try: os.remove('%s/%s' % (bibdoc.basedir, filename)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Error in removing '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), e) Md5Folder(bibdoc.basedir).update(only_new=False) bibdoc._build_file_list() self.build_bibdoc_list() return res class BibDoc: """this class represents one file attached to a record there is a one to one mapping between an instance of this class and an entry in the bibdoc db table""" def __init__ (self, docid="", recid="", docname="file", doctype="Main"): """Constructor of a bibdoc. At least the docid or the recid/docname pair is needed.""" # docid is known, the document already exists docname = normalize_docname(docname) self.docfiles = [] self.md5s = None self.related_files = [] if docid != "": if recid == "": recid = None self.doctype = "" res = run_sql("select id_bibrec,type from bibrec_bibdoc " "where id_bibdoc=%s", (docid,)) if len(res) > 0: recid = res[0][0] self.doctype = res[0][1] else: res = run_sql("select id_bibdoc1 from bibdoc_bibdoc " "where id_bibdoc2=%s", (docid,)) if len(res) > 0 : main_bibdoc = res[0][0] res = run_sql("select id_bibrec,type from bibrec_bibdoc " "where id_bibdoc=%s", (main_bibdoc,)) if len(res) > 0: recid = res[0][0] self.doctype = res[0][1] else: res = run_sql("select type from bibrec_bibdoc " "where id_bibrec=%s and id_bibdoc=%s", (recid, docid,)) if len(res) > 0: self.doctype = res[0][0] else: #this bibdoc isn't associated with the corresponding bibrec. raise InvenioWebSubmitFileError, "No docid associated with the recid %s" % recid # gather the other information res = run_sql("select id,status,docname,creation_date," "modification_date from bibdoc where id=%s", (docid,)) if len(res) > 0: self.cd = res[0][3] self.md = res[0][4] self.recid = recid self.docname = res[0][2] self.id = docid self.status = res[0][1] self.basedir = _make_base_dir(self.id) else: # this bibdoc doesn't exist raise InvenioWebSubmitFileError, "The docid %s does not exist." % docid # else it is a new document else: if docname == "" or doctype == "": raise InvenioWebSubmitFileError, "Argument missing for creating a new bibdoc" else: self.recid = recid self.doctype = doctype self.docname = docname self.status = '' if recid: res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s", (recid, docname)) if res: raise InvenioWebSubmitFileError, "A bibdoc called %s already exists for recid %s" % (docname, recid) self.id = run_sql("INSERT INTO bibdoc (status,docname,creation_date,modification_date) " "values(%s,%s,NOW(),NOW())", (self.status, docname,)) if self.id is not None: # we link the document to the record if a recid was # specified if self.recid != "": run_sql("INSERT INTO bibrec_bibdoc (id_bibrec, id_bibdoc, type) VALUES (%s,%s,%s)", (recid, self.id, self.doctype,)) else: raise InvenioWebSubmitFileError, "New docid cannot be created" self.basedir = _make_base_dir(self.id) # we create the corresponding storage directory if not os.path.exists(self.basedir): old_umask = os.umask(022) os.makedirs(self.basedir) # and save the father record id if it exists try: if self.recid != "": open("%s/.recid" % self.basedir, "w").write(str(self.recid)) if self.doctype != "": open("%s/.type" % self.basedir, "w").write(str(self.doctype)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, e os.umask(old_umask) # build list of attached files self._build_file_list('init') # link with related_files self._build_related_file_list() def __repr__(self): return 'BibDoc(%i, %i, %s, %s)' % (self.id, self.recid, repr(self.docname), repr(self.doctype)) def __str__(self): out = '%s:%i:::docname=%s\n' % (self.recid or '', self.id, self.docname) out += '%s:%i:::doctype=%s\n' % (self.recid or '', self.id, self.doctype) out += '%s:%i:::status=%s\n' % (self.recid or '', self.id, self.status) out += '%s:%i:::basedir=%s\n' % (self.recid or '', self.id, self.basedir) out += '%s:%i:::creation date=%s\n' % (self.recid or '', self.id, self.cd) out += '%s:%i:::modification date=%s\n' % (self.recid or '', self.id, self.md) out += '%s:%i:::total file attached=%s\n' % (self.recid or '', self.id, len(self.docfiles)) out += '%s:%i:::total size latest version=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size_latest_version())) out += '%s:%i:::total size all files=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size())) for docfile in self.docfiles: out += str(docfile) icon = self.get_icon() if icon: out += str(self.get_icon()) return out def get_status(self): """Retrieve the status.""" return self.status def touch(self): """Update the modification time of the bibdoc.""" run_sql('UPDATE bibdoc SET modification_date=NOW() WHERE id=%s', (self.id, )) if self.recid: run_sql('UPDATE bibrec SET modification_date=NOW() WHERE id=%s', (self.recid, )) def set_status(self, new_status): """Set a new status.""" run_sql('UPDATE bibdoc SET status=%s WHERE id=%s', (new_status, self.id)) self.status = new_status self.touch() self._build_file_list() self._build_related_file_list() def add_file_new_version(self, filename): """Add a new version of a file.""" try: latestVersion = self.get_latest_version() if latestVersion == 0: myversion = 1 else: myversion = latestVersion + 1 if os.path.exists(filename): dummy, dummy, format = decompose_file(filename) format = normalize_format(format) destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, myversion) try: shutil.copyfile(filename, destination) os.chmod(destination, 0644) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (file, destination, e) else: raise InvenioWebSubmitFileError, "'%s' does not exists!" % file finally: self.touch() Md5Folder(self.basedir).update() self._build_file_list() def purge(self): """Phisically Remove all the previous version of the given bibdoc""" version = self.get_latest_version() if version > 1: for file in self.docfiles: if file.get_version() < version: try: os.remove(file.get_full_path()) except Exception, e: register_exception() Md5Folder(self.basedir).update() self.touch() self._build_file_list() def expunge(self): """Phisically remove all the traces of a given bibdoc""" for file in self.docfiles: try: os.remove(file.get_full_path()) except Exception, e: register_exception() Md5Folder(self.basedir).update() self.touch() self._build_file_list() def revert(self, version): """Revert to a given version by copying its differnt formats to a new version.""" try: version = int(version) new_version = self.get_latest_version() + 1 for docfile in self.list_version_files(version): destination = "%s/%s%s;%i" % (self.basedir, self.docname, docfile.get_format(), new_version) if os.path.exists(destination): raise InvenioWebSubmitFileError, "A file for docname '%s' for the recid '%s' already exists for the format '%s'" % (self.docname, self.recid, docfile.get_format()) try: shutil.copyfile(docfile.get_full_path(), destination) os.chmod(destination, 0644) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (file, destination, e) finally: Md5Folder(self.basedir).update() self.touch() self._build_file_list() def add_file_new_format(self, filename, version=""): """add a new format of a file to an archive""" try: if version == "": version = self.get_latest_version() if version == 0: version = 1 if os.path.exists(filename): dummy, dummy, format = decompose_file(filename) format = normalize_format(format) destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, version) if os.path.exists(destination): raise InvenioWebSubmitFileError, "A file for docname '%s' for the recid '%s' already exists for the format '%s'" % (self.docname, self.recid, format) try: shutil.copyfile(filename, destination) os.chmod(destination, 0644) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (file, destination, e) else: raise InvenioWebSubmitFileError, "'%s' does not exists!" % file finally: Md5Folder(self.basedir).update() self.touch() self._build_file_list() def get_icon(self): """Returns the bibdoc corresponding to an icon of the given bibdoc.""" if self.related_files.has_key('Icon'): return self.related_files['Icon'][0] else: return None def add_icon(self, filename, basename=''): """Links an icon with the bibdoc object. Return the icon bibdoc""" #first check if an icon already exists existing_icon = self.get_icon() if existing_icon is not None: existing_icon.delete() #then add the new one if not basename: basename = decompose_file(filename)[1] newicon = BibDoc(doctype='Icon', docname=basename) newicon.add_file_new_version(filename) run_sql("INSERT INTO bibdoc_bibdoc (id_bibdoc1, id_bibdoc2, type) VALUES (%s,%s,'Icon')", (self.id, newicon.get_id(),)) try: try: old_umask = os.umask(022) open("%s/.docid" % newicon.get_base_dir(), "w").write(str(self.id)) open("%s/.type" % newicon.get_base_dir(), "w").write(str(self.doctype)) os.umask(old_umask) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while writing .docid and .doctype for folder '%s': '%s'" % (newicon.get_base_dir(), e) finally: Md5Folder(newicon.basedir).update() self.touch() self._build_related_file_list() return newicon def delete_icon(self): """Removes the current icon if it exists.""" existing_icon = self.get_icon() if existing_icon is not None: existing_icon.delete() self.touch() self._build_related_file_list() def display(self, version="", ln = CFG_SITE_LANG): """Returns a formatted representation of the files linked with the bibdoc. """ t = "" if version == "all": docfiles = self.list_all_files() elif version != "": version = int(version) docfiles = self.list_version_files(version) else: docfiles = self.list_latest_files() existing_icon = self.get_icon() if existing_icon is not None: existing_icon = existing_icon.list_all_files()[0] imageurl = "%s/record/%s/files/%s" % \ (CFG_SITE_URL, self.recid, existing_icon.get_full_name()) else: imageurl = "%s/img/smallfiles.gif" % CFG_SITE_URL versions = [] for version in list_versions_from_array(docfiles): currversion = { 'version' : version, 'previous' : 0, 'content' : [] } if version == self.get_latest_version() and version != 1: currversion['previous'] = 1 for docfile in docfiles: if docfile.get_version() == version: currversion['content'].append(docfile.display(ln = ln)) versions.append(currversion) t = websubmit_templates.tmpl_bibdoc_filelist( ln = ln, versions = versions, imageurl = imageurl, docname = self.docname, recid = self.recid ) return t def change_name(self, newname): """Rename the bibdoc name. New name must not be already used by the linked bibrecs.""" newname = normalize_docname(newname) res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s", (self.recid, newname)) if res: raise InvenioWebSubmitFileError, "A bibdoc called %s already exists for recid %s" % (newname, self.recid) run_sql("update bibdoc set docname=%s where id=%s", (newname, self.id,)) for f in os.listdir(self.basedir): if f.startswith(self.docname): shutil.move('%s/%s' % (self.basedir, f), '%s/%s' % (self.basedir, f.replace(self.docname, newname, 1))) self.docname = newname Md5Folder(self.basedir).update() self.touch() self._build_file_list('rename') self._build_related_file_list() def get_docname(self): """retrieve bibdoc name""" return self.docname def get_base_dir(self): """retrieve bibdoc base directory, e.g. /soft/cdsweb/var/data/files/123""" return self.basedir def get_type(self): """retrieve bibdoc doctype""" return self.doctype def get_recid(self): """retrieve bibdoc recid""" return self.recid def get_id(self): """retrieve bibdoc id""" return self.id def get_file(self, format, version=""): """Return a DocFile with docname name, with format (the extension), and with the given version. """ if version == "": docfiles = self.list_latest_files() else: version = int(version) docfiles = self.list_version_files(version) format = normalize_format(format) for docfile in docfiles: if (docfile.get_format()==format or not format): return docfile raise InvenioWebSubmitFileError, "No file called '%s' of format '%s', version '%s'" % (self.docname, format, version) def list_versions(self): """Returns the list of existing version numbers for a given bibdoc.""" versions = [] for docfile in self.docfiles: if not docfile.get_version() in versions: versions.append(docfile.get_version()) return versions def delete(self): """delete the current bibdoc instance""" try: self.change_name('DELETED-%s-%s' % (datetime.today().strftime('%Y%m%d%H%M%S'), self.docname)) run_sql("UPDATE bibdoc SET status='DELETED' WHERE id=%s", (self.id,)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "It's impossible to delete bibdoc %s: %s" % (self.id, e) def undelete(self, previous_status=''): """undelete a deleted file (only if it was actually deleted). The previous status, i.e. the restriction key can be provided. Otherwise the bibdoc will pe public.""" try: run_sql("UPDATE bibdoc SET status=%s WHERE id=%s AND status='DELETED'", (self.id, previous_status)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "It's impossible to undelete bibdoc %s: %s" % (self.id, e) if self.docname.startswith('DELETED-'): try: # Let's remove DELETED-20080214144322- in front of the docname original_name = '-'.join(self.docname.split('-')[2:]) self.change_name(original_name) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "It's impossible to restore the previous docname %s. %s kept as docname because:" % (original_name, self.docname, e) else: raise InvenioWebSubmitFileError, "Strange just undeleted docname isn't called DELETED-somedate-docname but %s" % self.docname def _build_file_list(self, context=''): """Lists all files attached to the bibdoc. This function should be called everytime the bibdoc is modified. As a side effect it log everything that has happened to the bibdocfiles in the log facility, according to the context: "init": means that the function has been called; for the first time by a constructor, hence no logging is performed "": by default means to log every deleted file as deleted and every added file as added; "rename": means that every appearently deleted file is logged as renamef and every new file as renamet. """ def log_action(action, docid, docname, format, version, size, checksum, timestamp=''): """Log an action into the bibdoclog table.""" try: if timestamp: run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)', (action, docid, docname, format, version, size, checksum, time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(timestamp)))) else: run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, NOW())', (action, docid, docname, format, version, size, checksum)) except DatabaseError: register_exception() def make_removed_added_bibdocfiles(previous_file_list): """Internal function for build the log of changed files.""" # Let's rebuild the previous situation old_files = {} for bibdocfile in previous_file_list: old_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md) # Let's rebuild the new situation new_files = {} for bibdocfile in self.docfiles: new_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md) # Let's subtract from added file all the files that are present in # the old list, and let's add to deleted files that are not present # added file. added_files = dict(new_files) deleted_files = {} for key, value in old_files.iteritems(): if added_files.has_key(key): del added_files[key] else: deleted_files[key] = value return (added_files, deleted_files) if context != 'init': previous_file_list = list(self.docfiles) self.docfiles = [] if os.path.exists(self.basedir): self.md5s = Md5Folder(self.basedir) files = os.listdir(self.basedir) files.sort() for fil in files: if not fil.startswith('.'): try: filepath = "%s/%s" % (self.basedir, fil) fileversion = int(re.sub(".*;", "", fil)) fullname = fil.replace(";%s" % fileversion, "") checksum = self.md5s.get_checksum(fil) (dirname, basename, format) = decompose_file(fullname) # we can append file: self.docfiles.append(BibDocFile(filepath, self.doctype, fileversion, basename, format, self.recid, self.id, self.status, checksum)) except Exception, e: register_exception() if context == 'init': return else: added_files, deleted_files = make_removed_added_bibdocfiles(previous_file_list) deletedstr = "DELETED" addedstr = "ADDED" if context == 'rename': deletedstr = "RENAMEDFROM" addedstr = "RENAMEDTO" for (docname, format, version), (size, checksum, md) in added_files.iteritems(): if context == 'rename': md = '' # No modification time log_action(addedstr, self.id, docname, format, version, size, checksum, md) for (docname, format, version), (size, checksum, md) in deleted_files.iteritems(): if context == 'rename': md = '' # No modification time log_action(deletedstr, self.id, docname, format, version, size, checksum, md) def _build_related_file_list(self): """Lists all files attached to the bibdoc. This function should be called everytime the bibdoc is modified within e.g. its icon. """ self.related_files = {} res = run_sql("SELECT ln.id_bibdoc2,ln.type,bibdoc.status FROM " "bibdoc_bibdoc AS ln,bibdoc WHERE id=ln.id_bibdoc2 AND " "ln.id_bibdoc1=%s", (self.id,)) for row in res: docid = row[0] doctype = row[1] if row[2] != 'DELETED': if not self.related_files.has_key(doctype): self.related_files[doctype] = [] cur_doc = BibDoc(docid=docid) self.related_files[doctype].append(cur_doc) def get_total_size_latest_version(self): """Return the total size used on disk of all the files belonging to this bibdoc and corresponding to the latest version.""" ret = 0 for bibdocfile in self.list_latest_files(): ret += bibdocfile.get_size() return ret def get_total_size(self): """Return the total size used on disk of all the files belonging to this bibdoc.""" ret = 0 for bibdocfile in self.list_all_files(): ret += bibdocfile.get_size() return ret def list_all_files(self): """Returns all the docfiles linked with the given bibdoc.""" return self.docfiles def list_latest_files(self): """Returns all the docfiles within the last version.""" return self.list_version_files(self.get_latest_version()) def list_version_files(self, version): """Return all the docfiles of a particular version.""" version = int(version) return [docfile for docfile in self.docfiles if docfile.get_version() == version] def get_latest_version(self): """ Returns the latest existing version number for the given bibdoc. If no file is associated to this bibdoc, returns '0'. """ if len(self.docfiles) > 0: self.docfiles.sort(order_files_with_version) return self.docfiles[0].get_version() else: return 0 def get_file_number(self): """Return the total number of files.""" return len(self.docfiles) def register_download(self, ip_address, version, format, userid=0): """Register the information about a download of a particular file.""" format = normalize_format(format) return run_sql("INSERT INTO rnkDOWNLOADS " "(id_bibrec,id_bibdoc,file_version,file_format," "id_user,client_host,download_time) VALUES " "(%s,%s,%s,%s,%s,INET_ATON(%s),NOW())", (self.recid, self.id, version, format, userid, ip_address,)) def readfile(filename): """Used only for backward compatibility.""" try: return open(filename).read() except Exception, e: register_exception() raise InvenioWebSubmitFileError, "It's impossible to read %s: %s" % (filename, e) class BibDocFile: """This class represents a physical file in the CDS Invenio filesystem. It should never be instantiated directly""" def __init__(self, fullpath, doctype, version, name, format, recid, docid, status, checksum): self.fullpath = fullpath self.doctype = doctype self.docid = docid self.recid = recid self.version = version self.status = status self.checksum = checksum self.size = os.path.getsize(fullpath) self.md = os.path.getmtime(fullpath) try: self.cd = os.path.getctime(fullpath) except OSError: self.cd = self.md self.name = name self.format = normalize_format(format) self.dir = os.path.dirname(fullpath) if format == "": self.mime = "text/plain" self.encoding = "" self.fullname = name else: self.fullname = "%s%s" % (name, self.format) (self.mime, self.encoding) = _mimes.guess_type(self.fullname) if self.mime is None: self.mime = "text/plain" def __repr__(self): return ('BibDocFile(%s, %s, %i, %s, %s, %i, %i, %s, %s)' % (repr(self.fullpath), repr(self.doctype), self.version, repr(self.name), repr(self.format), self.recid, self.docid, repr(self.status), repr(self.checksum))) def __str__(self): out = '%s:%s:%s:%s:fullpath=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullpath) out += '%s:%s:%s:%s:fullname=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullname) out += '%s:%s:%s:%s:name=%s\n' % (self.recid, self.docid, self.version, self.format, self.name) out += '%s:%s:%s:%s:status=%s\n' % (self.recid, self.docid, self.version, self.format, self.status) out += '%s:%s:%s:%s:checksum=%s\n' % (self.recid, self.docid, self.version, self.format, self.checksum) out += '%s:%s:%s:%s:size=%s\n' % (self.recid, self.docid, self.version, self.format, nice_size(self.size)) out += '%s:%s:%s:%s:creation time=%s\n' % (self.recid, self.docid, self.version, self.format, self.cd) out += '%s:%s:%s:%s:modification time=%s\n' % (self.recid, self.docid, self.version, self.format, self.md) out += '%s:%s:%s:%s:encoding=%s\n' % (self.recid, self.docid, self.version, self.format, self.encoding) return out def display(self, ln = CFG_SITE_LANG): """Returns a formatted representation of this docfile.""" return websubmit_templates.tmpl_bibdocfile_filelist( ln = ln, recid = self.recid, version = self.version, name = self.name, format = self.format, size = self.size, ) def is_restricted(self, req): """Returns restriction state. (see acc_authorize_action return values)""" if self.status not in ('', 'DELETED'): return acc_authorize_action(req, 'viewrestrdoc', status=self.status) elif self.status == 'DELETED': return (1, 'File has ben deleted') else: return (0, '') def get_type(self): return self.doctype def get_path(self): return self.fullpath def get_bibdocid(self): return self.docid def get_name(self): return self.name def get_full_name(self): return self.fullname def get_full_path(self): return self.fullpath def get_format(self): return self.format def get_size(self): return self.size def get_version(self): return self.version def get_checksum(self): return self.checksum def get_content(self): """Returns the binary content of the file.""" return open(self.fullpath, 'rb').read() def get_recid(self): """Returns the recid connected with the bibdoc of this file.""" return self.recid def get_status(self): """Returns the status of the file, i.e. either '', 'DELETED' or a restriction keyword.""" return self.status def stream(self, req): """Stream the file.""" if self.status: (auth_code, auth_message) = acc_authorize_action(req, 'viewrestrdoc', status=self.status) else: auth_code = 0 if auth_code == 0: if os.path.exists(self.fullpath): if calculate_md5(self.fullpath) != self.checksum: raise InvenioWebSubmitFileError, "File %s, version %i, for record %s is corrupted!" % (self.fullname, self.version, self.recid) req.content_type = self.mime req.encoding = self.encoding req.filename = self.fullname req.headers_out["Content-Disposition"] = \ "inline; filename=%s" % quoteattr(self.fullname) req.set_content_length(self.size) req.send_http_header() try: req.sendfile(self.fullpath) return "" except IOError, e: register_exception(req=req) raise InvenioWebSubmitFileError, "Encountered exception while reading '%s': '%s'" % (self.fullpath, e) else: raise InvenioWebSubmitFileError, "%s does not exists!" % self.fullpath else: raise InvenioWebSubmitFileError, "You are not authorized to download %s: %s" % (self.fullname, auth_message) def stream_restricted_icon(req): """Return the content of the "Restricted Icon" file.""" req.content_type = 'image/gif' req.encoding = None req.filename = 'restricted' req.headers_out["Content-Disposition"] = \ "inline; filename=%s" % quoteattr('restricted') req.set_content_length(os.path.getsize('%s/img/restricted.gif' % CFG_WEBDIR)) req.send_http_header() try: req.sendfile('%s/img/restricted.gif' % CFG_WEBDIR) return "" except Exception, e: register_exception(req=req) raise InvenioWebSubmitFileError, "Encountered exception while streaming restricted icon: '%s'" % (e, ) def list_types_from_array(bibdocs): """Retrieves the list of types from the given bibdoc list.""" types = [] for bibdoc in bibdocs: if not bibdoc.get_type() in types: types.append(bibdoc.get_type()) return types def list_versions_from_array(docfiles): """Retrieve the list of existing versions from the given docfiles list.""" versions = [] for docfile in docfiles: if not docfile.get_version() in versions: versions.append(docfile.get_version()) return versions def order_files_with_version(docfile1, docfile2): """order docfile objects according to their version""" version1 = docfile1.get_version() version2 = docfile2.get_version() return cmp(version2, version1) def _make_base_dir(docid): """Given a docid it returns the complete path that should host its files.""" group = "g" + str(int(int(docid) / CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT)) return "%s/%s/%s" % (CFG_WEBSUBMIT_FILEDIR, group, docid) class Md5Folder: """Manage all the Md5 checksum about a folder""" def __init__(self, folder): """Initialize the class from the md5 checksum of a given path""" self.folder = folder try: self.load() except InvenioWebSubmitFileError: self.md5s = {} self.update() def update(self, only_new = True): """Update the .md5 file with the current files. If only_new is specified then only not already calculated file are calculated.""" if os.path.exists(self.folder): for filename in os.listdir(self.folder): if not only_new or self.md5s.get(filename, None) is None and \ not filename.startswith('.'): self.md5s[filename] = calculate_md5("%s/%s" % (self.folder, filename)) self.store() def store(self): """Store the current md5 dictionary into .md5""" try: old_umask = os.umask(022) md5file = open("%s/.md5" % self.folder, "w") for key, value in self.md5s.items(): md5file.write('%s *%s\n' % (value, key)) os.umask(old_umask) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while storing .md5 for folder '%s': '%s'" % (self.folder, e) def load(self): """Load .md5 into the md5 dictionary""" self.md5s = {} try: for row in open("%s/.md5" % self.folder, "r"): md5hash = row[:32] filename = row[34:].strip() self.md5s[filename] = md5hash except IOError: self.update() except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while loading .md5 for folder '%s': '%s'" % (self.folder, e) def check(self, filename = ''): """Check the specified file or all the files for which it exists a hash for being coherent with the stored hash.""" if filename and filename in self.md5s.keys(): try: return self.md5s[filename] == calculate_md5("%s/%s" % (self.folder, filename)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s/%s': '%s'" % (self.folder, filename, e) else: for filename, md5hash in self.md5s.items(): try: if calculate_md5("%s/%s" % (self.folder, filename)) != md5hash: return False except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s/%s': '%s'" % (self.folder, filename, e) return True def get_checksum(self, filename): """Return the checksum of a physical file.""" md5hash = self.md5s.get(filename, None) if md5hash is None: self.update() # Now it should not fail! md5hash = self.md5s[filename] return md5hash def calculate_md5_external(filename): """Calculate the md5 of a physical file through md5sum Command Line Tool. This is suitable for file larger than 256Kb.""" try: md5_result = os.popen('md5sum --binary "%s"' % filename) ret = md5_result.read()[:32] md5_result.close() if len(ret) != 32: # Error in running md5sum. Let's fallback to internal # algorithm. return calculate_md5(filename, force_internal=True) else: return ret except Exception, e: raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e) def calculate_md5(filename, force_internal=False): """Calculate the md5 of a physical file. This is suitable for files smaller than 256Kb.""" if not CFG_BIBDOCFILE_MD5SUM_EXISTS or force_internal or os.path.getsize(filename) < CFG_BIBDOCFILE_MD5_THRESHOLD: try: to_be_read = open(filename, "rb") computed_md5 = md5.new() while True: buf = to_be_read.read(CFG_BIBDOCFILE_MD5_BUFFER) if buf: computed_md5.update(buf) else: break return computed_md5.hexdigest() except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e) else: return calculate_md5_external(filename) def bibdocfile_url_to_bibrecdocs(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns a BibRecDocs object for the corresponding recid.""" recid = decompose_bibdocfile_url(url)[0] return BibRecDocs(recid) def bibdocfile_url_to_bibdoc(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns a BibDoc object for the corresponding recid/docname.""" docname = decompose_bibdocfile_url(url)[1] return bibdocfile_url_to_bibrecdocs(url).get_bibdoc(docname) def bibdocfile_url_to_bibdocfile(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns a BibDocFile object for the corresponding recid/docname/format.""" dummy, dummy, format = decompose_bibdocfile_url(url) return bibdocfile_url_to_bibdoc(url).get_file(format) def bibdocfile_url_to_fullpath(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/record/xxx/files/... it returns the fullpath for the corresponding recid/docname/format.""" return bibdocfile_url_to_bibdocfile(url).get_full_path() def bibdocfile_url_p(url): """Return True when the url is a potential valid url pointing to a fulltext owned by a system.""" if not (url.startswith('%s/record/' % CFG_SITE_URL) or url.startswith('%s/record/' % CFG_SITE_SECURE_URL)): return False splitted_url = url.split('/files/') return len(splitted_url) == 2 and splitted_url[0] != '' and splitted_url[1] != '' def decompose_bibdocfile_url(url): """Given a bibdocfile_url return a triple (recid, docname, format).""" if url.startswith('%s/record/' % CFG_SITE_URL): recid_file = url[len('%s/record/' % CFG_SITE_URL):] elif url.startswith('%s/record/' % CFG_SITE_SECURE_URL): recid_file = url[len('%s/record/' % CFG_SITE_SECURE_URL):] else: raise InvenioWebSubmitFileError, "Url %s doesn't correspond to a valid record inside the system." % url recid_file = recid_file.replace('/files/', '/') recid, docname, format = decompose_file(recid_file) return (int(recid), docname, format) def nice_size(size): """Return a nicely printed size in kilo.""" unit = 'B' if size > 1024: size /= 1024.0 unit = 'KB' if size > 1024: size /= 1024.0 unit = 'MB' if size > 1024: size /= 1024.0 unit = 'GB' return '%s %s' % (websearch_templates.tmpl_nice_number(size, max_ndigits_after_dot=3), unit)