diff --git a/config/invenio.conf b/config/invenio.conf index ad6124f64..39a63222a 100644 --- a/config/invenio.conf +++ b/config/invenio.conf @@ -1,1972 +1,2051 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ################################################### ## About 'invenio.conf' and 'invenio-local.conf' ## ################################################### ## The 'invenio.conf' file contains the vanilla default configuration ## parameters of a Invenio installation, as coming out of the ## distribution. The file should be self-explanatory. Once installed ## in its usual location (usually /opt/invenio/etc), you could in ## principle go ahead and change the values according to your local ## needs, but this is not advised. ## ## If you would like to customize some of these parameters, you should ## rather create a file named 'invenio-local.conf' in the same ## directory where 'invenio.conf' lives and you should write there ## only the customizations that you want to be different from the ## vanilla defaults. ## ## Here is a realistic, minimalist, yet production-ready example of ## what you would typically put there: ## ## $ cat /opt/invenio/etc/invenio-local.conf ## [Invenio] ## CFG_SITE_NAME = John Doe's Document Server ## CFG_SITE_NAME_INTL_fr = Serveur des Documents de John Doe ## CFG_SITE_URL = http://your.site.com ## CFG_SITE_SECURE_URL = https://your.site.com ## CFG_SITE_ADMIN_EMAIL = john.doe@your.site.com ## CFG_SITE_SUPPORT_EMAIL = john.doe@your.site.com ## CFG_WEBALERT_ALERT_ENGINE_EMAIL = john.doe@your.site.com ## CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = john.doe@your.site.com ## CFG_WEBCOMMENT_DEFAULT_MODERATOR = john.doe@your.site.com ## CFG_DATABASE_HOST = localhost ## CFG_DATABASE_NAME = invenio ## CFG_DATABASE_USER = invenio ## CFG_DATABASE_PASS = my123p$ss ## ## You should override at least the parameters mentioned above and the ## parameters mentioned in the `Part 1: Essential parameters' below in ## order to define some very essential runtime parameters such as the ## name of your document server (CFG_SITE_NAME and ## CFG_SITE_NAME_INTL_*), the visible URL of your document server ## (CFG_SITE_URL and CFG_SITE_SECURE_URL), the email address of the ## local Invenio administrator, comment moderator, and alert engine ## (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_ADMIN_EMAIL, etc), and last but ## not least your database credentials (CFG_DATABASE_*). ## ## The Invenio system will then read both the default invenio.conf ## file and your customized invenio-local.conf file and it will ## override any default options with the ones you have specified in ## your local file. This cascading of configuration parameters will ## ease your future upgrades. [Invenio] ################################### ## Part 1: Essential parameters ## ################################### ## This part defines essential Invenio internal parameters that ## everybody should override, like the name of the server or the email ## address of the local Invenio administrator. ## CFG_DATABASE_* - specify which MySQL server to use, the name of the ## database to use, and the database access credentials. CFG_DATABASE_HOST = localhost CFG_DATABASE_PORT = 3306 CFG_DATABASE_NAME = invenio CFG_DATABASE_USER = invenio CFG_DATABASE_PASS = my123p$ss ## CFG_DATABASE_SLAVE - if you use DB replication, then specify the DB ## slave address credentials. (Assuming the same access rights to the ## DB slave as to the DB master.) If you don't use DB replication, ## then leave this option blank. CFG_DATABASE_SLAVE = ## CFG_SITE_URL - specify URL under which your installation will be ## visible. For example, use "http://your.site.com". Do not leave ## trailing slash. CFG_SITE_URL = http://localhost ## CFG_SITE_SECURE_URL - specify secure URL under which your ## installation secure pages such as login or registration will be ## visible. For example, use "https://your.site.com". Do not leave ## trailing slash. If you don't plan on using HTTPS, then you may ## leave this empty. CFG_SITE_SECURE_URL = https://localhost ## CFG_SITE_NAME -- the visible name of your Invenio installation. CFG_SITE_NAME = Atlantis Institute of Fictive Science ## CFG_SITE_NAME_INTL -- the international versions of CFG_SITE_NAME ## in various languages. (See also CFG_SITE_LANGS below.) CFG_SITE_NAME_INTL_en = Atlantis Institute of Fictive Science CFG_SITE_NAME_INTL_fr = Atlantis Institut des Sciences Fictives CFG_SITE_NAME_INTL_de = Atlantis Institut der fiktiven Wissenschaft CFG_SITE_NAME_INTL_es = Atlantis Instituto de la Ciencia Fictive CFG_SITE_NAME_INTL_ca = Institut Atlantis de Ciència Fictícia CFG_SITE_NAME_INTL_pt = Instituto Atlantis de Ciência Fictícia CFG_SITE_NAME_INTL_it = Atlantis Istituto di Scienza Fittizia CFG_SITE_NAME_INTL_ru = Институт Фиктивных Наук Атлантиды CFG_SITE_NAME_INTL_sk = Atlantis Inštitút Fiktívnych Vied CFG_SITE_NAME_INTL_cs = Atlantis Institut Fiktivních Věd CFG_SITE_NAME_INTL_no = Atlantis Institutt for Fiktiv Vitenskap CFG_SITE_NAME_INTL_sv = Atlantis Institut för Fiktiv Vetenskap CFG_SITE_NAME_INTL_el = Ινστιτούτο Φανταστικών Επιστημών Ατλαντίδος CFG_SITE_NAME_INTL_uk = Інститут вигаданих наук в Атлантісі CFG_SITE_NAME_INTL_ja = Fictive 科学のAtlantis の協会 CFG_SITE_NAME_INTL_pl = Instytut Fikcyjnej Nauki Atlantis CFG_SITE_NAME_INTL_bg = Институт за фиктивни науки Атлантис CFG_SITE_NAME_INTL_hr = Institut Fiktivnih Znanosti Atlantis CFG_SITE_NAME_INTL_zh_CN = 阿特兰提斯虚拟科学学院 CFG_SITE_NAME_INTL_zh_TW = 阿特蘭提斯虛擬科學學院 CFG_SITE_NAME_INTL_hu = Kitalált Tudományok Atlantiszi Intézete CFG_SITE_NAME_INTL_af = Atlantis Instituut van Fiktiewe Wetenskap CFG_SITE_NAME_INTL_gl = Instituto Atlantis de Ciencia Fictive CFG_SITE_NAME_INTL_ro = Institutul Atlantis al Ştiinţelor Fictive CFG_SITE_NAME_INTL_rw = Atlantis Ishuri Rikuru Ry'ubuhanga CFG_SITE_NAME_INTL_ka = ატლანტიდის ფიქტიური მეცნიერების ინსტიტუტი CFG_SITE_NAME_INTL_lt = Fiktyvių Mokslų Institutas Atlantis CFG_SITE_NAME_INTL_ar = معهد أطلنطيس للعلوم الافتراضية ## CFG_SITE_LANG -- the default language of the interface: ' CFG_SITE_LANG = en ## CFG_SITE_LANGS -- list of all languages the user interface should ## be available in, separated by commas. The order specified below ## will be respected on the interface pages. A good default would be ## to use the alphabetical order. Currently supported languages ## include Afrikaans, Arabic, Bulgarian, Catalan, Czech, German, Georgian, ## Greek, English, Spanish, French, Croatian, Hungarian, Galician, ## Italian, Japanese, Kinyarwanda, Lithuanian, Norwegian, Polish, ## Portuguese, Romanian, Russian, Slovak, Swedish, Ukrainian, Chinese ## (China), Chinese (Taiwan), so that the eventual maximum you can ## currently select is ## "af,ar,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW". CFG_SITE_LANGS = af,ar,bg,ca,cs,de,el,en,es,fr,hr,gl,ka,it,rw,lt,hu,ja,no,pl,pt,ro,ru,sk,sv,uk,zh_CN,zh_TW ## CFG_SITE_SUPPORT_EMAIL -- the email address of the support team for ## this installation: CFG_SITE_SUPPORT_EMAIL = info@invenio-software.org ## CFG_SITE_ADMIN_EMAIL -- the email address of the 'superuser' for ## this installation. Enter your email address below and login with ## this address when using Invenio inistration modules. You ## will then be automatically recognized as superuser of the system. CFG_SITE_ADMIN_EMAIL = info@invenio-software.org ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES -- list of email addresses to ## which an email should be sent in case of emergency (e.g. bibsched ## queue has been stopped because of an error). Configuration ## dictionary allows for different recipients based on weekday and ## time-of-day. Example: ## ## CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = { ## 'Sunday 22:00-06:00': '0041761111111@email2sms.foo.com', ## '06:00-18:00': 'team-in-europe@foo.com,0041762222222@email2sms.foo.com', ## '18:00-06:00': 'team-in-usa@foo.com', ## '*': 'john.doe.phone@foo.com'} ## ## If you want the emergency email notifications to always go to the ## same address, just use the wildcard line in the above example. CFG_SITE_EMERGENCY_EMAIL_ADDRESSES = {} ## CFG_SITE_ADMIN_EMAIL_EXCEPTIONS -- set this to 0 if you do not want ## to receive any captured exception via email to CFG_SITE_ADMIN_EMAIL ## address. Captured exceptions will still be available in ## var/log/invenio.err file. Set this to 1 if you want to receive ## some of the captured exceptions (this depends on the actual place ## where the exception is captured). Set this to 2 if you want to ## receive all captured exceptions. CFG_SITE_ADMIN_EMAIL_EXCEPTIONS = 1 ## CFG_SITE_RECORD -- what is the URI part representing detailed ## record pages? We recomment to leave the default value `record' ## unchanged. CFG_SITE_RECORD = record ## CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER -- set this to ## the number of seconds after which to reset the exception notification ## counter. A given repetitive exception is notified via email with a ## logarithmic strategy: the first time it is seen it is sent via email, ## then the second time, then the fourth, then the eighth and so forth. ## If the number of seconds elapsed since the last time it was notified ## is greater than CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER ## then the internal counter is reset in order not to have exception ## notification become more and more rare. CFG_ERRORLIB_RESET_EXCEPTION_NOTIFICATION_COUNTER_AFTER = 14400 ## CFG_CERN_SITE -- do we want to enable CERN-specific code? ## Put "1" for "yes" and "0" for "no". CFG_CERN_SITE = 0 ## CFG_INSPIRE_SITE -- do we want to enable INSPIRE-specific code? ## Put "1" for "yes" and "0" for "no". CFG_INSPIRE_SITE = 0 ## CFG_ADS_SITE -- do we want to enable ADS-specific code? ## Put "1" for "yes" and "0" for "no". CFG_ADS_SITE = 0 ## CFG_OPENAIRE_SITE -- do we want to enable OpenAIRE-specific code? ## Put "1" for "yes" and "0" for "no". CFG_OPENAIRE_SITE = 0 ## CFG_DEVEL_SITE -- is this a development site? If it is, you might ## prefer that it does not do certain things. For example, you might ## not want WebSubmit to send certain emails or trigger certain ## processes on a development site. ## Put "1" for "yes" (this is a development site) or "0" for "no" ## (this isn't a development site.) CFG_DEVEL_SITE = 0 ################################ ## Part 2: Web page style ## ################################ ## The variables affecting the page style. The most important one is ## the 'template skin' you would like to use and the obfuscation mode ## for your email addresses. Please refer to the WebStyle Admin Guide ## for more explanation. The other variables are listed here mostly ## for backwards compatibility purposes only. ## CFG_WEBSTYLE_TEMPLATE_SKIN -- what template skin do you want to ## use? CFG_WEBSTYLE_TEMPLATE_SKIN = default ## CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE. How do we "protect" ## email addresses from undesired automated email harvesters? This ## setting will not affect 'support' and 'admin' emails. ## NOTE: there is no ultimate solution to protect against email ## harvesting. All have drawbacks and can more or less be ## circumvented. Choose you preferred mode ([t] means "transparent" ## for the user): ## -1: hide all emails. ## [t] 0 : no protection, email returned as is. ## foo@example.com => foo@example.com ## 1 : basic email munging: replaces @ by [at] and . by [dot] ## foo@example.com => foo [at] example [dot] com ## [t] 2 : transparent name mangling: characters are replaced by ## equivalent HTML entities. ## foo@example.com => foo@example.com ## [t] 3 : javascript insertion. Requires Javascript enabled on client ## side. ## 4 : replaces @ and . characters by gif equivalents. ## foo@example.com => foo [at] example [dot] com CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE = 2 ## CFG_WEBSTYLE_INSPECT_TEMPLATES -- Do we want to debug all template ## functions so that they would return HTML results wrapped in ## comments indicating which part of HTML page was created by which ## template function? Useful only for debugging Pythonic HTML ## template. See WebStyle Admin Guide for more information. CFG_WEBSTYLE_INSPECT_TEMPLATES = 0 ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP -- eventual global HTML ## left top box: CFG_WEBSTYLE_CDSPAGEBOXLEFTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM -- eventual global ## HTML left bottom box: CFG_WEBSTYLE_CDSPAGEBOXLEFTBOTTOM = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP -- eventual global ## HTML right top box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTTOP = ## (deprecated) CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM -- eventual global ## HTML right bottom box: CFG_WEBSTYLE_CDSPAGEBOXRIGHTBOTTOM = ## CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST -- when certain HTTP status ## codes are raised to the WSGI handler, the corresponding exceptions ## and error messages can be sent to the system administrator for ## inspecting. This is useful to detect and correct errors. The ## variable represents a comma-separated list of HTTP statuses that ## should alert admin. Wildcards are possible. If the status is ## followed by an "r", it means that a referer is required to exist ## (useful to distinguish broken known links from URL typos when 404 ## errors are raised). CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST = 404r,400,5*,41* ## CFG_WEBSTYLE_HTTP_USE_COMPRESSION -- whether to enable deflate ## compression of your HTTP/HTTPS connections. This will affect the Apache ## configuration snippets created by inveniocfg --create-apache-conf and ## the OAI-PMH Identify response. CFG_WEBSTYLE_HTTP_USE_COMPRESSION = 0 ## CFG_WEBSTYLE_REVERSE_PROXY_IPS -- if you are setting a multinode ## environment where an HTTP proxy such as mod_proxy is sitting in ## front of the Invenio web application and is forwarding requests to ## worker nodes, set here the the list of IP addresses of the allowed ## HTTP proxies. This is needed in order to avoid IP address spoofing ## when worker nodes are also available on the public Internet and ## might receive forged HTTP requests. Only HTTP requests coming from ## the specified IP addresses will be considered as forwarded from a ## reverse proxy. E.g. set this to '123.123.123.123'. CFG_WEBSTYLE_REVERSE_PROXY_IPS = ################################## ## Part 3: WebSearch parameters ## ################################## ## This section contains some configuration parameters for WebSearch ## module. Please note that WebSearch is mostly configured on ## run-time via its WebSearch Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (Note that you may modify them ## afterwards too, though.) ## CFG_WEBSEARCH_SEARCH_CACHE_SIZE -- how many queries we want to ## cache in memory per one Apache httpd process? This cache is used ## mainly for "next/previous page" functionality, but it caches also ## "popular" user queries if more than one user happen to search for ## the same thing. Note that large numbers may lead to great memory ## consumption. We recommend a value not greater than 100. CFG_WEBSEARCH_SEARCH_CACHE_SIZE = 0 ## CFG_WEBSEARCH_FIELDS_CONVERT -- if you migrate from an older ## system, you may want to map field codes of your old system (such as ## 'ti') to Invenio/MySQL ("title"). Use Python dictionary syntax ## for the translation table, e.g. {'wau':'author', 'wti':'title'}. ## Usually you don't want to do that, and you would use empty dict {}. CFG_WEBSEARCH_FIELDS_CONVERT = {} ## CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the light search interface, in ## characters. CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60 CFG_WEBSEARCH_LIGHTSEARCH_PATTERN_BOX_WIDTH = 60 ## CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH -- width of the search ## pattern window in the simple search interface, in characters. CFG_WEBSEARCH_SIMPLESEARCH_PATTERN_BOX_WIDTH = 40 ## CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH -- width of the ## search pattern window in the advanced search interface, in ## characters. CFG_WEBSEARCH_ADVANCEDSEARCH_PATTERN_BOX_WIDTH = 30 ## CFG_WEBSEARCH_NB_RECORDS_TO_SORT -- how many records do we still ## want to sort? For higher numbers we print only a warning and won't ## perform any sorting other than default 'latest records first', as ## sorting would be very time consuming then. We recommend a value of ## not more than a couple of thousands. CFG_WEBSEARCH_NB_RECORDS_TO_SORT = 1000 ## CFG_WEBSEARCH_CALL_BIBFORMAT -- if a record is being displayed but ## it was not preformatted in the "HTML brief" format, do we want to ## call BibFormatting on the fly? Put "1" for "yes" and "0" for "no". ## Note that "1" will display the record exactly as if it were fully ## preformatted, but it may be slow due to on-the-fly processing; "0" ## will display a default format very fast, but it may not have all ## the fields as in the fully preformatted HTML brief format. Note ## also that this option is active only for old (PHP) formats; the new ## (Python) formats are called on the fly by default anyway, since ## they are much faster. When usure, please set "0" here. CFG_WEBSEARCH_CALL_BIBFORMAT = 0 ## CFG_WEBSEARCH_USE_ALEPH_SYSNOS -- do we want to make old SYSNOs ## visible rather than MySQL's record IDs? You may use this if you ## migrate from a different e-doc system, and you store your old ## system numbers into 970__a. Put "1" for "yes" and "0" for ## "no". Usually you don't want to do that, though. CFG_WEBSEARCH_USE_ALEPH_SYSNOS = 0 ## CFG_WEBSEARCH_I18N_LATEST_ADDITIONS -- Put "1" if you want the ## "Latest Additions" in the web collection pages to show ## internationalized records. Useful only if your brief BibFormat ## templates contains internationalized strings. Otherwise put "0" in ## order not to slow down the creation of latest additions by WebColl. CFG_WEBSEARCH_I18N_LATEST_ADDITIONS = 0 ## CFG_WEBSEARCH_INSTANT_BROWSE -- the number of records to display ## under 'Latest Additions' in the web collection pages. CFG_WEBSEARCH_INSTANT_BROWSE = 10 ## CFG_WEBSEARCH_INSTANT_BROWSE_RSS -- the number of records to ## display in the RSS feed. CFG_WEBSEARCH_INSTANT_BROWSE_RSS = 25 ## CFG_WEBSEARCH_RSS_I18N_COLLECTIONS -- comma-separated list of ## collections that feature an internationalized RSS feed on their ## main seach interface page created by webcoll. Other collections ## will have RSS feed using CFG_SITE_LANG. CFG_WEBSEARCH_RSS_I18N_COLLECTIONS = ## CFG_WEBSEARCH_RSS_TTL -- number of minutes that indicates how long ## a feed cache is valid. CFG_WEBSEARCH_RSS_TTL = 360 ## CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS -- maximum number of request kept ## in cache. If the cache is filled, following request are not cached. CFG_WEBSEARCH_RSS_MAX_CACHED_REQUESTS = 1000 ## CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD -- up to how many author names ## to print explicitely; for more print "et al". Note that this is ## used in default formatting that is seldomly used, as usually ## BibFormat defines all the format. The value below is only used ## when BibFormat fails, for example. CFG_WEBSEARCH_AUTHOR_ET_AL_THRESHOLD = 3 ## CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS -- whether to show or ## not collection grandsons in Narrow Search boxes (sons are shown by ## default, grandsons are configurable here). Use 0 for no and 1 for ## yes. CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS = 1 ## CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX -- shall we ## create help links for Ellis, Nick or Ellis, Nicholas and friends ## when Ellis, N was searched for? Useful if you have one author ## stored in the database under several name formats, namely surname ## comma firstname and surname comma initial cataloging policy. Use 0 ## for no and 1 for yes. CFG_WEBSEARCH_CREATE_SIMILARLY_NAMED_AUTHORS_LINK_BOX = 1 ## CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS -- MathJax is a JavaScript ## library that renders (La)TeX mathematical formulas in the client ## browser. This parameter must contain a comma-separated list of ## output formats for which to apply the MathJax rendering, for example ## "hb,hd". If the list is empty, MathJax is disabled. CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS = ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT -- when searching ## external collections (e.g. SPIRES, CiteSeer, etc), how many seconds ## do we wait for reply before abandonning? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT = 5 ## CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS -- how many ## results do we fetch? CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_MAXRESULTS = 10 ## CFG_WEBSEARCH_SPLIT_BY_COLLECTION -- do we want to split the search ## results by collection or not? Use 0 for not, 1 for yes. CFG_WEBSEARCH_SPLIT_BY_COLLECTION = 1 ## CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS -- the default number of ## records to display per page in the search results pages. CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS = 10 ## CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS -- in order to limit denial of ## service attacks the total number of records per group displayed as a ## result of a search query will be limited to this number. Only the superuser ## queries will not be affected by this limit. CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS = 200 ## CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL -- logged in users ## might have rights to access some restricted collections. This variable ## tweaks the kind of support the system will automatically provide to the ## user with respect to searching into these restricted collections. ## Set this to 0 in order to have the user to explicitly activate restricted ## collections in order to search into them. Set this to 1 in order to ## propose to the user the list of restricted collections to which he/she has ## rights (note: this is not yet implemented). Set this to 2 in order to ## silently add all the restricted collections to which the user has rights to ## to any query. ## Note: the system will discover which restricted collections a user has ## rights to, at login time. The time complexity of this procedure is ## proportional to the number of restricted collections. E.g. for a system ## with ~50 restricted collections, you might expect ~1s of delay in the ## login time, when this variable is set to a value higher than 0. CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL = 0 ## CFG_WEBSEARCH_SHOW_COMMENT_COUNT -- do we want to show the 'N comments' ## links on the search engine pages? (useful only when you have allowed ## commenting) CFG_WEBSEARCH_SHOW_COMMENT_COUNT = 1 ## CFG_WEBSEARCH_SHOW_REVIEW_COUNT -- do we want to show the 'N reviews' ## links on the search engine pages? (useful only when you have allowed ## reviewing) CFG_WEBSEARCH_SHOW_REVIEW_COUNT = 1 ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS -- how many full-text snippets to ## display for full-text searches? CFG_WEBSEARCH_FULLTEXT_SNIPPETS = 4 ## CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS -- how many context words ## to display around the pattern in the snippet? CFG_WEBSEARCH_FULLTEXT_SNIPPETS_WORDS = 4 ## CFG_WEBSEARCH_WILDCARD_LIMIT -- some of the queries, wildcard ## queries in particular (ex: cern*, a*), but also regular expressions ## (ex: [a-z]+), may take a long time to respond due to the high ## number of hits. You can limit the number of terms matched by a ## wildcard by setting this variable. A negative value or zero means ## that none of the queries will be limited (which may be wanted by ## also prone to denial-of-service kind of attacks). CFG_WEBSEARCH_WILDCARD_LIMIT = 50000 ## CFG_WEBSEARCH_SYNONYM_KBRS -- defines which knowledge bases are to ## be used for which index in order to provide runtime synonym lookup ## of user-supplied terms, and what massaging function should be used ## upon search pattern before performing the KB lookup. (Can be one ## of `exact', 'leading_to_comma', `leading_to_number'.) CFG_WEBSEARCH_SYNONYM_KBRS = { 'journal': ['SEARCH-SYNONYM-JOURNAL', 'leading_to_number'], } ## CFG_SOLR_URL -- optionally, you may use Solr to serve full-text ## queries. If so, please specify the URL of your Solr instance. ## Example: http://localhost:8983/solr (default solr port) CFG_SOLR_URL = ## CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT -- specify the limit when ## the previous/next/back hit links are to be displayed on detailed record pages. ## In order to speeding up list manipulations, if a search returns lots of hits, ## more than this limit, then do not loose time calculating next/previous/back ## hits at all, but display page directly without these. ## Note also that Invenio installations that do not like ## to have the next/previous hit link functionality would be able to set this ## variable to zero and not see anything. CFG_WEBSEARCH_PREV_NEXT_HIT_LIMIT = 1000 ## CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY -- when a record belongs to more than one ## restricted collection, if the viewrestcoll policy is set to "ALL" (default) ## then the user must be authorized to all the restricted collections, in ## order to be granted access to the specific record. If the policy is set to ## "ANY", then the user need to be authorized to only one of the collections ## in order to be granted access to the specific record. CFG_WEBSEARCH_VIEWRESTRCOLL_POLICY = ALL ## CFG_WEBSEARCH_SPIRES_SYNTAX -- variable to configure the use of the ## SPIRES query syntax in searches. Values: 0 = SPIRES syntax is ## switched off; 1 = leading 'find' is required; 9 = leading 'find' is ## not required (leading SPIRES operator, space-operator-space, etc ## are also accepted). CFG_WEBSEARCH_SPIRES_SYNTAX = 1 ## CFG_WEBSEARCH_DISPLAY_NEAREST_TERMS -- when user search does not ## return any direct result, what do we want to display? Set to 0 in ## order to display a generic message about search returning no hits. ## Set to 1 in order to display list of nearest terms from the indexes ## that may match user query. Note: this functionality may be slow, ## so you may want to disable it on bigger sites. CFG_WEBSEARCH_DISPLAY_NEAREST_TERMS = 1 ## CFG_WEBSEARCH_DETAILED_META_FORMAT -- the output format to use for ## detailed meta tags containing metadata as configured in the tag ## table. Default output format should be 'hdm', included. This ## format will be included in the header of /record/ pages. For ## efficiency this format should be pre-cached with BibReformat. See ## also CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR and ## CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR. CFG_WEBSEARCH_DETAILED_META_FORMAT = hdm ## CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR -- decides if meta tags for ## Google Scholar shall be included in the detailed record page ## header, when using the standard formatting templates/elements. See ## also CFG_WEBSEARCH_DETAILED_META_FORMAT and ## CFG_WEBSEARCH_ENABLE_OPENGRAPH. When this variable is changed and ## output format defined in CFG_WEBSEARCH_DETAILED_META_FORMAT is ## cached, a bibreformat must be run for the cached records. CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR = True ## CFG_WEBSEARCH_ENABLE_OPENGRAPH -- decides if meta tags for the Open ## Graph protocol shall be included in the detailed record page ## header, when using the standard formatting templates/elements. See ## also CFG_WEBSEARCH_DETAILED_META_FORMAT and ## CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR. When this variable is changed ## and output format defined in CFG_WEBSEARCH_DETAILED_META_FORMAT is ## cached, a bibreformat must be run for the cached records. Note that ## enabling Open Graph produces invalid XHTML/HTML5 markup. CFG_WEBSEARCH_ENABLE_OPENGRAPH = False ####################################### ## Part 4: BibHarvest OAI parameters ## ####################################### ## This part defines parameters for the Invenio OAI gateway. ## Useful if you are running Invenio as OAI data provider. ## CFG_OAI_ID_FIELD -- OAI identifier MARC field: CFG_OAI_ID_FIELD = 909COo ## CFG_OAI_SET_FIELD -- OAI set MARC field: CFG_OAI_SET_FIELD = 909COp ## CFG_OAI_SET_FIELD -- previous OAI set MARC field: CFG_OAI_PREVIOUS_SET_FIELD = 909COq ## CFG_OAI_DELETED_POLICY -- OAI deletedrecordspolicy ## (no/transient/persistent): CFG_OAI_DELETED_POLICY = persistent ## CFG_OAI_ID_PREFIX -- OAI identifier prefix: CFG_OAI_ID_PREFIX = atlantis.cern.ch ## CFG_OAI_SAMPLE_IDENTIFIER -- OAI sample identifier: CFG_OAI_SAMPLE_IDENTIFIER = oai:atlantis.cern.ch:123 ## CFG_OAI_IDENTIFY_DESCRIPTION -- description for the OAI Identify verb: CFG_OAI_IDENTIFY_DESCRIPTION = http://atlantis.cern.ch/ Free and unlimited use by anybody with obligation to refer to original record Full content, i.e. preprints may not be harvested by robots Submission restricted. Submitted documents are subject of approval by OAI repository admins. ## CFG_OAI_LOAD -- OAI number of records in a response: CFG_OAI_LOAD = 500 ## CFG_OAI_EXPIRE -- OAI resumptionToken expiration time: CFG_OAI_EXPIRE = 90000 ## CFG_OAI_SLEEP -- service unavailable between two consecutive ## requests for CFG_OAI_SLEEP seconds: CFG_OAI_SLEEP = 2 ## CFG_OAI_METADATA_FORMATS -- mapping between accepted metadataPrefixes and ## the corresponding output format to use, its schema and its metadataNamespace. CFG_OAI_METADATA_FORMATS = { 'marcxml': ('XOAIMARC', 'http://www.openarchives.org/OAI/1.1/dc.xsd', 'http://purl.org/dc/elements/1.1/'), 'oai_dc': ('XOAIDC', 'http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd', 'http://www.loc.gov/MARC21/slim'), } ## CFG_OAI_FRIENDS -- list of OAI baseURL of friend repositories. See: ## CFG_OAI_FRIENDS = http://cdsweb.cern.ch/oai2d,http://openaire.cern.ch/oai2d,http://export.arxiv.org/oai2 ## The following subfields are a completition to ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG. If CFG_OAI_PROVENANCE_BASEURL_SUBFIELD is ## set for a record, then the corresponding field is considered has being ## harvested via OAI-PMH ## CFG_OAI_PROVENANCE_BASEURL_SUBFIELD -- baseURL of the originDescription or a ## record CFG_OAI_PROVENANCE_BASEURL_SUBFIELD = u ## CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD -- datestamp of the originDescription ## or a record CFG_OAI_PROVENANCE_DATESTAMP_SUBFIELD = d ## CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD -- metadataNamespace of the ## originDescription or a record CFG_OAI_PROVENANCE_METADATANAMESPACE_SUBFIELD = m ## CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD -- originDescription of the ## originDescription or a record CFG_OAI_PROVENANCE_ORIGINDESCRIPTION_SUBFIELD = d ## CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD -- harvestDate of the ## originDescription or a record CFG_OAI_PROVENANCE_HARVESTDATE_SUBFIELD = h ## CFG_OAI_PROVENANCE_ALTERED_SUBFIELD -- altered flag of the ## originDescription or a record CFG_OAI_PROVENANCE_ALTERED_SUBFIELD = t ## CFG_OAI_FAILED_HARVESTING_STOP_QUEUE -- when harvesting OAI sources ## fails, shall we report an error with the task and stop BibSched ## queue, or simply wait for the next run of the task? A value of 0 ## will stop the task upon errors, 1 will let the queue run if the ## next run of the oaiharvest task can safely recover the failure ## (this means that the queue will stop if the task is not set to run ## periodically) CFG_OAI_FAILED_HARVESTING_STOP_QUEUE = 1 ## CFG_OAI_FAILED_HARVESTING_EMAILS_ADMIN -- when ## CFG_OAI_FAILED_HARVESTING_STOP_QUEUE is set to leave the queue ## running upon errors, shall we send an email to admin to notify ## about the failure? CFG_OAI_FAILED_HARVESTING_EMAILS_ADMIN = True ## NOTE: the following parameters are experimenta ## ----------------------------------------------------------------------------- ## CFG_OAI_RIGHTS_FIELD -- MARC field dedicated to storing Copyright information CFG_OAI_RIGHTS_FIELD = 542__ ## CFG_OAI_RIGHTS_HOLDER_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright holder information CFG_OAI_RIGHTS_HOLDER_SUBFIELD = d ## CFG_OAI_RIGHTS_DATE_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright date information CFG_OAI_RIGHTS_DATE_SUBFIELD = g ## CFG_OAI_RIGHTS_URI_SUBFIELD -- MARC subfield dedicated to storing the URI ## (URL or URN, more detailed statement about copyright status) information CFG_OAI_RIGHTS_URI_SUBFIELD = u ## CFG_OAI_RIGHTS_CONTACT_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright holder contact information CFG_OAI_RIGHTS_CONTACT_SUBFIELD = e ## CFG_OAI_RIGHTS_STATEMENT_SUBFIELD -- MARC subfield dedicated to storing the ## Copyright statement as presented on the resource CFG_OAI_RIGHTS_STATEMENT_SUBFIELD = f ## CFG_OAI_LICENSE_FIELD -- MARC field dedicated to storing terms governing ## use and reproduction (license) CFG_OAI_LICENSE_FIELD = 540__ ## CFG_OAI_LICENSE_TERMS_SUBFIELD -- MARC subfield dedicated to storing the ## Terms governing use and reproduction, e.g. CC License CFG_OAI_LICENSE_TERMS_SUBFIELD = a ## CFG_OAI_LICENSE_PUBLISHER_SUBFIELD -- MARC subfield dedicated to storing the ## person or institution imposing the license (author, publisher) CFG_OAI_LICENSE_PUBLISHER_SUBFIELD = b ## CFG_OAI_LICENSE_URI_SUBFIELD -- MARC subfield dedicated to storing the URI ## URI CFG_OAI_LICENSE_URI_SUBFIELD = u ##------------------------------------------------------------------------------ ################################## ## Part 5: WebSubmit parameters ## ################################## ## This section contains some configuration parameters for WebSubmit ## module. Please note that WebSubmit is mostly configured on ## run-time via its WebSubmit Admin web interface. The parameters ## below are the ones that you do not probably want to modify during ## the runtime. +## CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES -- this is the list of +## doctypes (like 'Main' or 'Additional') and their description that admins +## can choose from when adding new files via the Document File Manager +## admin interface. +## - When no value is provided, admins cannot add new +## file (they can only revise/delete/add format) +## - When a single value is given, it is used as +## default doctype for all new documents +## +## Order is relevant +## Eg: +## [('main', 'Main document'), ('additional', 'Figure, schema. etc')] +CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES = [ + ('Main', 'Main document'), + ('LaTeX', 'LaTeX'), + ('Source', 'Source'), + ('Additional', 'Additional File'), + ('Audio', 'Audio file'), + ('Video', 'Video file'), + ('Script', 'Script'), + ('Data', 'Data'), + ('Figure', 'Figure'), + ('Schema', 'Schema'), + ('Graph', 'Graph'), + ('Image', 'Image'), + ('Drawing', 'Drawing'), + ('Slides', 'Slides')] + +## CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS -- this is the +## list of restrictions (like 'Restricted' or 'No Restriction') and their +## description that admins can choose from when adding or revising files. +## Restrictions can then be configured at the level of WebAccess. +## - When no value is provided, no restriction is +## applied +## - When a single value is given, it is used as +## default resctriction for all documents. +## - The first value of the list is used as default +## restriction if the user if not given the +## choice of the restriction. Order is relevant +## +## Eg: +## [('', 'No restriction'), ('restr', 'Restricted')] +CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS = [ + ('', 'Public'), + ('restricted', 'Restricted')] + +## CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC -- set here the other +## default flags and attributes to tune the Document File Manager admin +## interface. +## See the docstring of websubmit_managedocfiles.create_file_upload_interface +## to have a description of the available parameters and their syntax. +## In general you will rarely need to change this variable. +CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC = { + 'can_revise_doctypes': ['*'], + 'can_comment_doctypes': ['*'], + 'can_describe_doctypes': ['*'], + 'can_delete_doctypes': ['*'], + 'can_keep_doctypes': ['*'], + 'can_rename_doctypes': ['*'], + 'can_add_format_to_doctypes': ['*'], + 'can_restrict_doctypes': ['*'], + } + + ## CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT -- the fulltext ## documents are stored under "/opt/invenio/var/data/files/gX/Y" ## directories where X is 0,1,... and Y stands for bibdoc ID. Thusly ## documents Y are grouped into directories X and this variable ## indicates the maximum number of documents Y stored in each ## directory X. This limit is imposed solely for filesystem ## performance reasons in order not to have too many subdirectories in ## a given directory. CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT = 5000 ## CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS -- a comma-separated ## list of document extensions not listed in Python standard mimetype ## library that should be recognized by Invenio. CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS = hpg,link,lis,llb,mat,mpp,msg,docx,docm,xlsx,xlsm,xlsb,pptx,pptm,ppsx,ppsm ## CFG_WEBSUBMIT_DESIRED_CONVERSIONS -- a dictionary having as keys ## a format and as values the corresponding list of desired converted ## formats. CFG_WEBSUBMIT_DESIRED_CONVERSIONS = { 'pdf' : ('pdf;pdfa', ), 'ps.gz' : ('pdf;pdfa', ), 'djvu' : ('pdf', ), 'sxw': ('doc', 'odt', 'pdf;pdfa', ), 'docx' : ('doc', 'odt', 'pdf;pdfa', ), 'doc' : ('odt', 'pdf;pdfa', 'docx'), 'rtf' : ('pdf;pdfa', 'odt', ), 'odt' : ('pdf;pdfa', 'doc', ), 'pptx' : ('ppt', 'odp', 'pdf;pdfa', ), 'ppt' : ('odp', 'pdf;pdfa', 'pptx'), 'sxi': ('odp', 'pdf;pdfa', ), 'odp' : ('pdf;pdfa', 'ppt', ), 'xlsx' : ('xls', 'ods', 'csv'), 'xls' : ('ods', 'csv'), 'ods' : ('xls', 'xlsx', 'csv'), 'sxc': ('xls', 'xlsx', 'csv'), 'tiff' : ('pdf;pdfa', ), 'tif' : ('pdf;pdfa', ),} ## CFG_BIBDOCFILE_USE_XSENDFILE -- if your web server supports ## XSendfile header, you may want to enable this feature in order for ## to Invenio tell the web server to stream files for download (after ## proper authorization checks) by web server's means. This helps to ## liberate Invenio worker processes from being busy with sending big ## files to clients. The web server will take care of that. Note: ## this feature is still somewhat experimental. Note: when enabled ## (set to 1), then you have to also regenerate Apache vhost conf ## snippets (inveniocfg --update-config-py --create-apache-conf). CFG_BIBDOCFILE_USE_XSENDFILE = 0 ## CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY -- a number between 0 and ## 1 that indicates probability with which MD5 checksum will be ## verified when streaming bibdocfile-managed files. (0.1 will cause ## the check to be performed once for every 10 downloads) CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY = 0.1 ## CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM -- a comma-separated ## list of document extensions in decrescent order of preference ## to suggest what is considered the best format to extract text from. CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM = ('txt', 'html', 'xml', 'odt', 'doc', 'docx', 'djvu', 'pdf', 'ps', 'ps.gz') ## CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE -- whether to use the ## database table bibdocfsinfo as reference for filesystem ## information. The default is 0. Switch this to 1 ## after you have run bibdocfile --fix-bibdocfsinfo-cache ## or on an empty system. CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE = 0 ## CFG_OPENOFFICE_SERVER_HOST -- the host where an OpenOffice Server is ## listening to. If localhost an OpenOffice server will be started ## automatically if it is not already running. ## Note: if you set this to an empty value this will disable the usage of ## OpenOffice for converting documents. ## If you set this to something different than localhost you'll have to take ## care to have an OpenOffice server running on the corresponding host and ## to install the same OpenOffice release both on the client and on the server ## side. ## In order to launch an OpenOffice server on a remote machine, just start ## the usual 'soffice' executable in this way: ## $> soffice -headless -nologo -nodefault -norestore -nofirststartwizard \ ## .. -accept=socket,host=HOST,port=PORT;urp;StarOffice.ComponentContext CFG_OPENOFFICE_SERVER_HOST = localhost ## CFG_OPENOFFICE_SERVER_PORT -- the port where an OpenOffice Server is ## listening to. CFG_OPENOFFICE_SERVER_PORT = 2002 ## CFG_OPENOFFICE_USER -- the user that will be used to launch the OpenOffice ## client. It is recommended to set this to a user who don't own files, like ## e.g. 'nobody'. You should also authorize your Apache server user to be ## able to become this user, e.g. by adding to your /etc/sudoers the following ## line: ## "apache ALL=(nobody) NOPASSWD: ALL" ## provided that apache is the username corresponding to the Apache user. ## On some machine this might be apache2 or www-data. CFG_OPENOFFICE_USER = nobody ################################# ## Part 6: BibIndex parameters ## ################################# ## This section contains some configuration parameters for BibIndex ## module. Please note that BibIndex is mostly configured on run-time ## via its BibIndex Admin web interface. The parameters below are the ## ones that you do not probably want to modify very often during the ## runtime. ## CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY -- when fulltext indexing, do ## you want to index locally stored files only, or also external URLs? ## Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_FULLTEXT_INDEX_LOCAL_FILES_ONLY = 1 ## CFG_BIBINDEX_REMOVE_STOPWORDS -- when indexing, do we want to remove ## stopwords? Use "0" to say "no" and "1" to say "yes". CFG_BIBINDEX_REMOVE_STOPWORDS = 0 ## CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS -- characters considered as ## alphanumeric separators of word-blocks inside words. You probably ## don't want to change this. CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS = \!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ ## CFG_BIBINDEX_CHARS_PUNCTUATION -- characters considered as punctuation ## between word-blocks inside words. You probably don't want to ## change this. CFG_BIBINDEX_CHARS_PUNCTUATION = \.\,\:\;\?\!\" ## CFG_BIBINDEX_REMOVE_HTML_MARKUP -- should we attempt to remove HTML markup ## before indexing? Use 1 if you have HTML markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_HTML_MARKUP = 0 ## CFG_BIBINDEX_REMOVE_LATEX_MARKUP -- should we attempt to remove LATEX markup ## before indexing? Use 1 if you have LATEX markup inside metadata ## (e.g. in abstracts), use 0 otherwise. CFG_BIBINDEX_REMOVE_LATEX_MARKUP = 0 ## CFG_BIBINDEX_MIN_WORD_LENGTH -- minimum word length allowed to be added to ## index. The terms smaller then this amount will be discarded. ## Useful to keep the database clean, however you can safely leave ## this value on 0 for up to 1,000,000 documents. CFG_BIBINDEX_MIN_WORD_LENGTH = 0 ## CFG_BIBINDEX_URLOPENER_USERNAME and CFG_BIBINDEX_URLOPENER_PASSWORD -- ## access credentials to access restricted URLs, interesting only if ## you are fulltext-indexing files located on a remote server that is ## only available via username/password. But it's probably better to ## handle this case via IP or some convention; the current scheme is ## mostly there for demo only. CFG_BIBINDEX_URLOPENER_USERNAME = mysuperuser CFG_BIBINDEX_URLOPENER_PASSWORD = mysuperpass ## CFG_INTBITSET_ENABLE_SANITY_CHECKS -- ## Enable sanity checks for integers passed to the intbitset data ## structures. It is good to enable this during debugging ## and to disable this value for speed improvements. CFG_INTBITSET_ENABLE_SANITY_CHECKS = False ## CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES -- regular expression that matches ## docnames for which OCR is desired (set this to .* in order to enable ## OCR in general, set this to empty in order to disable it.) CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES = scan-.* ## CFG_BIBINDEX_SPLASH_PAGES -- key-value mapping where the key corresponds ## to a regular expression that matches the URLs of the splash pages of ## a given service and the value is a regular expression of the set of URLs ## referenced via tags in the HTML content of the splash pages that are ## referring to documents that need to be indexed. ## NOTE: for backward compatibility reasons you can set this to a simple ## regular expression that will directly be used as the unique key of the ## map, with corresponding value set to ".*" (in order to match any URL) CFG_BIBINDEX_SPLASH_PAGES = { "http://documents\.cern\.ch/setlink\?.*": ".*", "http://ilcagenda\.linearcollider\.org/subContributionDisplay\.py\?.*|http://ilcagenda\.linearcollider\.org/contributionDisplay\.py\?.*": "http://ilcagenda\.linearcollider\.org/getFile\.py/access\?.*|http://ilcagenda\.linearcollider\.org/materialDisplay\.py\?.*", } ## CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES -- do we want ## the author word index to exclude first names to keep only last ## names? If set to True, then for the author `Bernard, Denis', only ## `Bernard' will be indexed in the word index, not `Denis'. Note ## that if you change this variable, you have to re-index the author ## index via `bibindex -w author -R'. CFG_BIBINDEX_AUTHOR_WORD_INDEX_EXCLUDE_FIRST_NAMES = False ## CFG_BIBINDEX_SYNONYM_KBRS -- defines which knowledge bases are to ## be used for which index in order to provide index-time synonym ## lookup, and what massaging function should be used upon search ## pattern before performing the KB lookup. (Can be one of `exact', ## 'leading_to_comma', `leading_to_number'.) CFG_BIBINDEX_SYNONYM_KBRS = { 'global': ['INDEX-SYNONYM-TITLE', 'exact'], 'title': ['INDEX-SYNONYM-TITLE', 'exact'], } ####################################### ## Part 7: Access control parameters ## ####################################### ## This section contains some configuration parameters for the access ## control system. Please note that WebAccess is mostly configured on ## run-time via its WebAccess Admin web interface. The parameters ## below are the ones that you do not probably want to modify very ## often during the runtime. (If you do want to modify them during ## runtime, for example te deny access temporarily because of backups, ## you can edit access_control_config.py directly, no need to get back ## here and no need to redo the make process.) ## CFG_ACCESS_CONTROL_LEVEL_SITE -- defines how open this site is. ## Use 0 for normal operation of the site, 1 for read-only site (all ## write operations temporarily closed), 2 for site fully closed, ## 3 for also disabling any database connection. ## Useful for site maintenance. CFG_ACCESS_CONTROL_LEVEL_SITE = 0 ## CFG_ACCESS_CONTROL_LEVEL_GUESTS -- guest users access policy. Use ## 0 to allow guest users, 1 not to allow them (all users must login). CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0 ## CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS -- account registration and ## activation policy. When 0, users can register and accounts are ## automatically activated. When 1, users can register but admin must ## activate the accounts. When 2, users cannot register nor update ## their email address, only admin can register accounts. When 3, ## users cannot register nor update email address nor password, only ## admin can register accounts. When 4, the same as 3 applies, nor ## user cannot change his login method. When 5, then the same as 4 ## applies, plus info about how to get an account is hidden from the ## login page. CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN -- limit account ## registration to certain email addresses? If wanted, give domain ## name below, e.g. "cern.ch". If not wanted, leave it empty. CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN = ## CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS -- send a ## notification email to the administrator when a new account is ## created? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT -- send a ## notification email to the user when a new account is created in order to ## to verify the validity of the provided email address? Use ## 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 1 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION -- send a ## notification email to the user when a new account is activated? ## Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0 ## CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION -- send a ## notification email to the user when a new account is deleted or ## account demand rejected? Use 0 for no, 1 for yes. CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0 ## CFG_APACHE_PASSWORD_FILE -- the file where Apache user credentials ## are stored. Must be an absolute pathname. If the value does not ## start by a slash, it is considered to be the filename of a file ## located under prefix/var/tmp directory. This is useful for the ## demo site testing purposes. For the production site, if you plan ## to restrict access to some collections based on the Apache user ## authentication mechanism, you should put here an absolute path to ## your Apache password file. CFG_APACHE_PASSWORD_FILE = demo-site-apache-user-passwords ## CFG_APACHE_GROUP_FILE -- the file where Apache user groups are ## defined. See the documentation of the preceding config variable. CFG_APACHE_GROUP_FILE = demo-site-apache-user-groups ################################### ## Part 8: WebSession parameters ## ################################### ## This section contains some configuration parameters for tweaking ## session handling. ## CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT -- number of days after which a session ## and the corresponding cookie is considered expired. CFG_WEBSESSION_EXPIRY_LIMIT_DEFAULT = 2 ## CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER -- number of days after which a session ## and the corresponding cookie is considered expired, when the user has ## requested to permanently stay logged in. CFG_WEBSESSION_EXPIRY_LIMIT_REMEMBER = 365 ## CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS -- when user requested ## a password reset, for how many days is the URL valid? CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS -- when an account ## activation email was sent, for how many days is the URL valid? CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS = 3 ## CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS -- when ## user won't confirm his email address and not complete ## registeration, after how many days will it expire? CFG_WEBSESSION_NOT_CONFIRMED_EMAIL_ADDRESS_EXPIRE_IN_DAYS = 10 ## CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS -- when set to 1, the session ## system allocates the same uid=0 to all guests users regardless of where they ## come from. 0 allocate a unique uid to each guest. CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS = 0 ## CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS -- to prevent session cookie ## stealing, Invenio checks that the IP address of a connection is the ## same as that of the connection which created the initial session. ## This variable let you decide how many bits should be skipped during ## this check. Set this to 0 in order to enable full IP address ## checking. Set this to 32 in order to disable IP address checking. ## Intermediate values (say 8) let you have some degree of security so ## that you can trust your local network only while helping to solve ## issues related to outside clients that configured their browser to ## use a web proxy for HTTP connection but not for HTTPS, thus ## potentially having two different IP addresses. In general, if use ## HTTPS in order to serve authenticated content, you can safely set ## CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS to 32. CFG_WEBSESSION_IPADDR_CHECK_SKIP_BITS = 0 ################################ ## Part 9: BibRank parameters ## ################################ ## This section contains some configuration parameters for the ranking ## system. ## CFG_BIBRANK_SHOW_READING_STATS -- do we want to show reading ## similarity stats? ('People who viewed this page also viewed') CFG_BIBRANK_SHOW_READING_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_STATS -- do we want to show the download ## similarity stats? ('People who downloaded this document also ## downloaded') CFG_BIBRANK_SHOW_DOWNLOAD_STATS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS -- do we want to show download ## history graph? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS = 1 ## CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION -- do we ## want to show a graph representing the distribution of client IPs ## downloading given document? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_DOWNLOAD_GRAPHS_CLIENT_IP_DISTRIBUTION = 0 ## CFG_BIBRANK_SHOW_CITATION_LINKS -- do we want to show the 'Cited ## by' links? (useful only when you have citations in the metadata) CFG_BIBRANK_SHOW_CITATION_LINKS = 1 ## CFG_BIBRANK_SHOW_CITATION_STATS -- de we want to show citation ## stats? ('Cited by M recors', 'Co-cited with N records') CFG_BIBRANK_SHOW_CITATION_STATS = 1 ## CFG_BIBRANK_SHOW_CITATION_GRAPHS -- do we want to show citation ## history graph? (0=no | 1=classic/gnuplot | 2=flot) CFG_BIBRANK_SHOW_CITATION_GRAPHS = 1 #################################### ## Part 10: WebComment parameters ## #################################### ## This section contains some configuration parameters for the ## commenting and reviewing facilities. ## CFG_WEBCOMMENT_ALLOW_COMMENTS -- do we want to allow users write ## public comments on records? CFG_WEBCOMMENT_ALLOW_COMMENTS = 1 ## CFG_WEBCOMMENT_ALLOW_REVIEWS -- do we want to allow users write ## public reviews of records? CFG_WEBCOMMENT_ALLOW_REVIEWS = 1 ## CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS -- do we want to allow short ## reviews, that is just the attribution of stars without submitting ## detailed review text? CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS = 0 ## CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN -- if users ## report a comment to be abusive, how many they have to be before the ## site admin is alerted? CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN = 5 ## CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW -- how many comments do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_COMMENTS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW -- how many reviews do ## we display in the detailed record page upon welcome? CFG_WEBCOMMENT_NB_REVIEWS_IN_DETAILED_VIEW = 1 ## CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL -- do we notify the site ## admin after every comment? CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL = 1 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple comment submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS = 20 ## CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS -- how many ## elapsed seconds do we consider enough when checking for possible ## multiple review submissions by a user? CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS = 20 ## CFG_WEBCOMMENT_USE_RICH_EDITOR -- enable the WYSIWYG ## Javascript-based editor when user edits comments? CFG_WEBCOMMENT_USE_RICH_TEXT_EDITOR = False ## CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be sent: CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL = info@invenio-software.org ## CFG_WEBCOMMENT_DEFAULT_MODERATOR -- if no rules are ## specified to indicate who is the comment moderator of ## a collection, this person will be used as default CFG_WEBCOMMENT_DEFAULT_MODERATOR = info@invenio-software.org ## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS -- do we want to allow the use ## of MathJax plugin to render latex input in comments? CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS = 1 ## CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION -- allow comment author to ## delete its own comment? CFG_WEBCOMMENT_AUTHOR_DELETE_COMMENT_OPTION = 1 # CFG_WEBCOMMENT_EMAIL_REPLIES_TO -- which field of the record define # email addresses that should be notified of newly submitted comments, # and for which collection. Use collection names as keys, and list of # tags as values CFG_WEBCOMMENT_EMAIL_REPLIES_TO = { 'Articles': ['506__d', '506__m'], } # CFG_WEBCOMMENT_RESTRICTION_DATAFIELD -- which field of the record # define the restriction (must be linked to WebAccess # 'viewrestrcomment') to apply to newly submitted comments, and for # which collection. Use collection names as keys, and one tag as value CFG_WEBCOMMENT_RESTRICTION_DATAFIELD = { 'Articles': '5061_a', 'Pictures': '5061_a', 'Theses': '5061_a', } # CFG_WEBCOMMENT_ROUND_DATAFIELD -- which field of the record define # the current round of comment for which collection. Use collection # name as key, and one tag as value CFG_WEBCOMMENT_ROUND_DATAFIELD = { 'Articles': '562__c', 'Pictures': '562__c', } # CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE -- max file size per attached # file, in bytes. Choose 0 if you don't want to limit the size CFG_WEBCOMMENT_MAX_ATTACHMENT_SIZE = 5242880 # CFG_WEBCOMMENT_MAX_ATTACHED_FILES -- maxium number of files that can # be attached per comment. Choose 0 if you don't want to limit the # number of files. File uploads can be restricted with action # "attachcommentfile". CFG_WEBCOMMENT_MAX_ATTACHED_FILES = 5 # CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH -- how many levels of # indentation discussions can be. This can be used to ensure that # discussions will not go into deep levels of nesting if users don't # understand the difference between "reply to comment" and "add # comment". When the depth is reached, any "reply to comment" is # conceptually converted to a "reply to thread" (i.e. reply to this # parent's comment). Use -1 for no limit, 0 for unthreaded (flat) # discussions. CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH = 1 ################################## ## Part 11: BibSched parameters ## ################################## ## This section contains some configuration parameters for the ## bibliographic task scheduler. ## CFG_BIBSCHED_REFRESHTIME -- how often do we want to refresh ## bibsched monitor? (in seconds) CFG_BIBSCHED_REFRESHTIME = 5 ## CFG_BIBSCHED_LOG_PAGER -- what pager to use to view bibsched task ## logs? CFG_BIBSCHED_LOG_PAGER = /usr/bin/less ## CFG_BIBSCHED_EDITOR -- what editor to use to edit the marcxml ## code of the locked records CFG_BIBSCHED_EDITOR = /usr/bin/vim ## CFG_BIBSCHED_GC_TASKS_OLDER_THAN -- after how many days to perform the ## gargbage collector of BibSched queue (i.e. removing/moving task to archive). CFG_BIBSCHED_GC_TASKS_OLDER_THAN = 30 ## CFG_BIBSCHED_GC_TASKS_TO_REMOVE -- list of BibTask that can be safely ## removed from the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_REMOVE = bibindex,bibreformat,webcoll,bibrank,inveniogc ## CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE -- list of BibTasks that should be safely ## archived out of the BibSched queue once they are DONE. CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE = bibupload,oaiarchive ## CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS -- maximum number of BibTasks ## that can run concurrently. ## NOTE: concurrent tasks are still considered as an experimental ## feature. Please keep this value set to 1 on production environments. CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS = 1 ## CFG_BIBSCHED_PROCESS_USER -- bibsched and bibtask processes must ## usually run under the same identity as the Apache web server ## process in order to share proper file read/write privileges. If ## you want to force some other bibsched/bibtask user, e.g. because ## you are using a local `invenio' user that belongs to your ## `www-data' Apache user group and so shares writing rights with your ## Apache web server process in this way, then please set its username ## identity here. Otherwise we shall check whether your ## bibsched/bibtask processes are run under the same identity as your ## Apache web server process (in which case you can leave the default ## empty value here). CFG_BIBSCHED_PROCESS_USER = ## CFG_BIBSCHED_NODE_TASKS -- specific nodes may be configured to ## run only specific tasks; if you want this, then this variable is a ## dictionary of the form {'hostname1': ['task1', 'task2']}. The ## default is that any node can run any task. CFG_BIBSCHED_NODE_TASKS = {} ## CFG_BIBSCHED_MAX_ARCHIVED_ROWS_DISPLAY -- number of tasks displayed ## CFG_BIBSCHED_MAX_ARCHIVED_ROWS_DISPLAY = 500 ################################### ## Part 12: WebBasket parameters ## ################################### ## CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS -- a safety limit for ## a maximum number of displayed baskets CFG_WEBBASKET_MAX_NUMBER_OF_DISPLAYED_BASKETS = 20 ## CFG_WEBBASKET_USE_RICH_TEXT_EDITOR -- enable the WYSIWYG ## Javascript-based editor when user edits comments in WebBasket? CFG_WEBBASKET_USE_RICH_TEXT_EDITOR = False ################################## ## Part 13: WebAlert parameters ## ################################## ## This section contains some configuration parameters for the ## automatic email notification alert system. ## CFG_WEBALERT_ALERT_ENGINE_EMAIL -- the email address from which the ## alert emails will appear to be sent: CFG_WEBALERT_ALERT_ENGINE_EMAIL = info@invenio-software.org ## CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL -- how many records ## at most do we send in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_RECORDS_IN_ALERT_EMAIL = 20 ## CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL -- number of ## chars per line in an outgoing alert email? CFG_WEBALERT_MAX_NUM_OF_CHARS_PER_LINE_IN_ALERT_EMAIL = 72 ## CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES -- when sending alert ## emails fails, how many times we retry? CFG_WEBALERT_SEND_EMAIL_NUMBER_OF_TRIES = 3 ## CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES -- when sending ## alert emails fails, what is the sleeptime between tries? (in ## seconds) CFG_WEBALERT_SEND_EMAIL_SLEEPTIME_BETWEEN_TRIES = 300 #################################### ## Part 14: WebMessage parameters ## #################################### ## CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE -- how large web messages do we ## allow? CFG_WEBMESSAGE_MAX_SIZE_OF_MESSAGE = 20000 ## CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES -- how many messages for a ## regular user do we allow in its inbox? CFG_WEBMESSAGE_MAX_NB_OF_MESSAGES = 30 ## CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS -- how many days before ## we delete orphaned messages? CFG_WEBMESSAGE_DAYS_BEFORE_DELETE_ORPHANS = 60 ################################## ## Part 15: MiscUtil parameters ## ################################## ## CFG_MISCUTIL_SQL_USE_SQLALCHEMY -- whether to use SQLAlchemy.pool ## in the DB engine of Invenio. It is okay to enable this flag ## even if you have not installed SQLAlchemy. Note that Invenio will ## loose some perfomance if this option is enabled. CFG_MISCUTIL_SQL_USE_SQLALCHEMY = False ## CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT -- how many queries can we run ## inside run_sql_many() in one SQL statement? The limit value ## depends on MySQL's max_allowed_packet configuration. CFG_MISCUTIL_SQL_RUN_SQL_MANY_LIMIT = 10000 ## CFG_MISCUTIL_SMTP_HOST -- which server to use as outgoing mail server to ## send outgoing emails generated by the system, for example concerning ## submissions or email notification alerts. CFG_MISCUTIL_SMTP_HOST = localhost ## CFG_MISCUTIL_SMTP_PORT -- which port to use on the outgoing mail server ## defined in the previous step. CFG_MISCUTIL_SMTP_PORT = 25 ## CFG_MISCUTILS_DEFAULT_PROCESS_TIMEOUT -- the default number of seconds after ## which a process launched trough shellutils.run_process_with_timeout will ## be killed. This is useful to catch runaway processes. CFG_MISCUTIL_DEFAULT_PROCESS_TIMEOUT = 300 ## CFG_MATHJAX_HOSTING -- if you plan to use MathJax to display TeX ## formulas on HTML web pages, you can specify whether you wish to use ## 'local' hosting or 'cdn' hosting of MathJax libraries. (If set to ## 'local', you have to run 'make install-mathjax-plugin' as described ## in the INSTALL guide.) If set to 'local', users will use your site ## to download MathJax sources. If set to 'cdn', users will use ## centralized MathJax CDN servers instead. Please note that using ## CDN is suitable only for small institutions or for MathJax ## sponsors; see the MathJax website for more details. (Also, please ## note that if you plan to use MathJax on your site, you have to ## adapt CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS and ## CFG_WEBCOMMENT_USE_MATHJAX_IN_COMMENTS configuration variables ## elsewhere in this file.) CFG_MATHJAX_HOSTING = local ################################# ## Part 16: BibEdit parameters ## ################################# ## CFG_BIBEDIT_TIMEOUT -- when a user edits a record, this record is ## locked to prevent other users to edit it at the same time. ## How many seconds of inactivity before the locked record again will be free ## for other people to edit? CFG_BIBEDIT_TIMEOUT = 3600 ## CFG_BIBEDIT_LOCKLEVEL -- when a user tries to edit a record which there ## is a pending bibupload task for in the queue, this shouldn't be permitted. ## The lock level determines how thouroughly the queue should be investigated ## to determine if this is the case. ## Level 0 - always permits editing, doesn't look at the queue ## (unsafe, use only if you know what you are doing) ## Level 1 - permits editing if there are no queued bibedit tasks for this record ## (safe with respect to bibedit, but not for other bibupload maintenance jobs) ## Level 2 - permits editing if there are no queued bibupload tasks of any sort ## (safe, but may lock more than necessary if many cataloguers around) ## Level 3 - permits editing if no queued bibupload task concerns given record ## (safe, most precise locking, but slow, ## checks for 001/EXTERNAL_SYSNO_TAG/EXTERNAL_OAIID_TAG) ## The recommended level is 3 (default) or 2 (if you use maintenance jobs often). CFG_BIBEDIT_LOCKLEVEL = 3 ## CFG_BIBEDIT_PROTECTED_FIELDS -- a comma-separated list of fields that BibEdit ## will not allow to be added, edited or deleted. Wildcards are not supported, ## but conceptually a wildcard is added at the end of every field specification. ## Examples: ## 500A - protect all MARC fields with tag 500 and first indicator A ## 5 - protect all MARC fields in the 500-series. ## 909C_a - protect subfield a in tag 909 with first indicator C and empty ## second indicator ## Note that 001 is protected by default, but if protection of other ## identifiers or automated fields is a requirement, they should be added to ## this list. CFG_BIBEDIT_PROTECTED_FIELDS = ## CFG_BIBEDIT_QUEUE_CHECK_METHOD -- how do we want to check for ## possible queue locking situations to prevent cataloguers from ## editing a record that may be waiting in the queue? Use 'bibrecord' ## for exact checking (always works, but may be slow), use 'regexp' ## for regular expression based checking (very fast, but may be ## inaccurate). When unsure, use 'bibrecord'. CFG_BIBEDIT_QUEUE_CHECK_METHOD = bibrecord ## CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE -- a dictionary ## containing which collections will be extended with a given template ## while being displayed in BibEdit UI. CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE = { 'Poetry' : 'poem'} ## CFG_BIBEDIT_KB_SUBJECTS - Name of the KB used in the field 65017a ## to automatically convert codes into extended version. e.g ## a - Astrophysics CFG_BIBEDIT_KB_SUBJECTS = Subjects ## CFG_BIBEDIT_KB_INSTITUTIONS - Name of the KB used for institution ## autocomplete. To be applied in fields defined in ## CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS CFG_BIBEDIT_KB_INSTITUTIONS = InstitutionsCollection ## CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS - list of fields to ## be autocompleted with the KB CFG_BIBEDIT_KB_INSTITUTIONS CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS = 100__u,700__u,701__u,502__c ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING -- maximum number of records ## that can be modified instantly using the multi-record editor. Above ## this limit, modifications will only be executed in limited hours. CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING = 2000 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING -- maximum number of records ## that can be send for modification without having a superadmin role. ## If the number of records is between CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING ## and this number, the modifications will take place only in limited hours. CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING = 20000 ## CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME -- Allowed time to ## execute modifications on records, when the number exceeds ## CFG_BIBEDITMULTI_LIMIT_INSTANT_PROCESSING. CFG_BIBEDITMULTI_LIMIT_DELAYED_PROCESSING_TIME = 22:00-05:00 ################################### ## Part 17: BibUpload parameters ## ################################### ## CFG_BIBUPLOAD_REFERENCE_TAG -- where do we store references? CFG_BIBUPLOAD_REFERENCE_TAG = 999 ## CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG -- where do we store external ## system numbers? Useful for matching when our records come from an ## external digital library system. CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG = 970__a ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG -- where do we store OAI ID tags ## of harvested records? Useful for matching when we harvest stuff ## via OAI that we do not want to reexport via Invenio OAI; so records ## may have only the source OAI ID stored in this tag (kind of like ## external system number too). CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG = 035__a ## CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG -- where do we store OAI SRC ## tags of harvested records? Useful for matching when we harvest stuff ## via OAI that we do not want to reexport via Invenio OAI; so records ## may have only the source OAI SRC stored in this tag (kind of like ## external system number too). Note that the field should be the same of ## CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG. CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG = 035__9 ## CFG_BIBUPLOAD_STRONG_TAGS -- a comma-separated list of tags that ## are strong enough to resist the replace mode. Useful for tags that ## might be created from an external non-metadata-like source, ## e.g. the information about the number of copies left. CFG_BIBUPLOAD_STRONG_TAGS = 964 ## CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS -- a comma-separated list ## of tags that contain provenance information that should be checked ## in the bibupload correct mode via matching provenance codes. (Only ## field instances of the same provenance information would be acted ## upon.) Please specify the whole tag info up to subfield codes. CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS = 6531_9 ## CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS -- a comma-separated list of system ## paths from which it is allowed to take fulltextes that will be uploaded via ## FFT (CFG_TMPDIR is included by default). CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS = /tmp,/home ## CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS -- a dictionary containing external ## URLs that can be accessed by Invenio and specific HTTP headers that will be ## used for each URL. ## The keys of the dictionary are regular expressions matching a set of URLs, ## the values are dictionaries of headers as consumed by urllib2.Request. If a ## regular expression matching all URLs is created at the end of the list, it ## means that Invenio will download all URLs. Otherwise Invenio will just ## download authorized URLs. +## Note: by default, a User-Agent built after the current Invenio version, +## site name, and site URL will be used. ## CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [ ## ('http://myurl.com/.*', {'User-Agent': 'Me'}), ## ('http://yoururl.com/.*', {'User-Agent': 'You', 'Accept': 'text/plain'}), ## ('http://.*', {'User-Agent': 'Invenio'}), ## ] CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [ - ('http(s)?://.*', {'User-Agent': 'Invenio'}), + ('http(s)?://.*', {}), ] ## CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE -- do we want to serialize ## internal representation of records (Pythonic record structure) into ## the database? This can improve internal processing speed of some ## operations at the price of somewhat bigger disk space usage. ## If you change this value after some records have already been added ## to your installation, you may want to run: ## $ /opt/invenio/bin/inveniocfg --reset-recstruct-cache ## in order to either erase the cache thus freeing database space, ## or to fill the cache for all records that have not been cached yet. CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE = 1 ## CFG_BIBUPLOAD_DELETE_FORMATS -- which formats do we want bibupload ## to delete when a record is ingested? Enter comma-separated list of ## formats. For example, 'hb,hd' will delete pre-formatted HTML brief ## and defailed formats from cache, so that search engine will ## generate them on-the-fly. Useful to always present latest data of ## records upon record display, until the periodical bibreformat job ## runs next and updates the cache. CFG_BIBUPLOAD_DELETE_FORMATS = hb ## CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY -- a comma-separated list ## indicating which fields match the file names of the documents to be ## uploaded. ## The matching will be done in the same order as the list provided. CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY = reportnumber,recid ## CFG_BATCHUPLOADER_DAEMON_DIR -- Directory where the batchuploader daemon ## will look for the subfolders metadata and document by default. ## If path is relative, CFG_PREFIX will be joined as a prefix CFG_BATCHUPLOADER_DAEMON_DIR = var/batchupload -## CFG_BATCHUPLOADER_WEB_ROBOT_AGENT -- Comma-separated list to specify the +## CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS -- Regular expression to specify the ## agents permitted when calling batch uploader web interface ## cdsweb.cern.ch/batchuploader/robotupload -## if using a curl, eg: curl xxx -A invenio_webupload -CFG_BATCHUPLOADER_WEB_ROBOT_AGENT = invenio_webupload +## if using a curl, eg: curl xxx -A invenio +CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS = invenio_webupload|Invenio-.* ## CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS -- Access list specifying for each ## IP address, which collections are allowed using batch uploader robot ## interface. CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS = { + '127.0.0.1': ['*'], # useful for testing + '127.0.1.1': ['*'], # useful for testing '10.0.0.1': ['BOOK', 'REPORT'], # Example 1 '10.0.0.2': ['POETRY', 'PREPRINT'], # Example 2 } #################################### ## Part 18: BibCatalog parameters ## #################################### ## CFG_BIBCATALOG_SYSTEM -- set desired catalog system. For example, RT. CFG_BIBCATALOG_SYSTEM = ## RT CONFIGURATION ## CFG_BIBCATALOG_SYSTEM_RT_CLI -- path to the RT CLI client CFG_BIBCATALOG_SYSTEM_RT_CLI = /usr/bin/rt ## CFG_BIBCATALOG_SYSTEM_RT_URL -- Base URL of the remote RT system CFG_BIBCATALOG_SYSTEM_RT_URL = http://localhost/rt3 ## CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER -- Set the username for a default RT account ## on remote system, with limited privileges, in order to only create and modify own tickets. CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER = ## CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD -- Set the password for the default RT account ## on remote system. CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD = #################################### ## Part 19: BibFormat parameters ## #################################### ## CFG_BIBFORMAT_HIDDEN_TAGS -- comma-separated list of MARC tags that ## are not shown to users not having cataloging authorizations. CFG_BIBFORMAT_HIDDEN_TAGS = 595 ## CFG_BIBFORMAT_HIDDEN_FILE_FORMATS -- comma-separated list of file formats ## that are not shown explicitly to user not having cataloging authorizations. ## e.g. pdf;pdfa,xml CFG_BIBFORMAT_HIDDEN_FILE_FORMATS = ## CFG_BIBFORMAT_ADDTHIS_ID -- if you want to use the AddThis service from ## , set this value to the pubid parameter as ## provided by the service (e.g. ra-4ff80aae118f4dad), and add a call to ## formatting element in your formats, for example ## Default_HTML_detailed.bft. CFG_BIBFORMAT_ADDTHIS_ID = ## CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS -- For each output ## format BibReformat currently creates a cache for only one language ## (CFG_SITE_LANG) per record. This means that visitors having set a ## different language than CFG_SITE_LANG will be served an on-the-fly ## output using the language of their choice. You can disable this ## behaviour by specifying below for which output format you would ## like to force the cache to be used whatever language is ## requested. If your format templates do not provide ## internationalization, you can optimize your site by setting for ## eg. hb,hd to always serve the precached output (if it exists) in ## the CFG_SITE_LANG CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS = #################################### ## Part 20: BibMatch parameters ## #################################### ## CFG_BIBMATCH_LOCAL_SLEEPTIME -- Determines the amount of seconds to sleep ## between search queries on LOCAL system. CFG_BIBMATCH_LOCAL_SLEEPTIME = 0.0 ## CFG_BIBMATCH_REMOTE_SLEEPTIME -- Determines the amount of seconds to sleep ## between search queries on REMOTE systems. CFG_BIBMATCH_REMOTE_SLEEPTIME = 2.0 ## CFG_BIBMATCH_FUZZY_WORDLIMITS -- Determines the amount of words to extract ## from a certain fields value during fuzzy matching mode. Add/change field ## and appropriate number to the dictionary to configure this. CFG_BIBMATCH_FUZZY_WORDLIMITS = { '100__a': 2, '245__a': 4 } ## CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT -- Determines the amount of empty results ## to accept during fuzzy matching mode. CFG_BIBMATCH_FUZZY_EMPTY_RESULT_LIMIT = 1 ## CFG_BIBMATCH_QUERY_TEMPLATES -- Here you can set the various predefined querystrings ## used to standardize common matching queries. By default the following templates ## are given: ## title - standard title search. Taken from 245__a (default) ## title-author - title and author search (i.e. this is a title AND author a) ## Taken from 245__a and 100__a ## reportnumber - reportnumber search (i.e. reportnumber:REP-NO-123). CFG_BIBMATCH_QUERY_TEMPLATES = { 'title' : '[title]', 'title-author' : '[title] [author]', 'reportnumber' : 'reportnumber:[reportnumber]' } ## CFG_BIBMATCH_MATCH_VALIDATION_RULESETS -- Here you can define the various rulesets for ## validating search results done by BibMatch. Each ruleset contains a certain pattern mapped ## to a tuple defining a "matching-strategy". ## ## The rule-definitions must come in two parts: ## ## * The first part is a string containing a regular expression ## that is matched against the textmarc representation of each record. ## If a match is found, the final rule-set is updated with ## the given "sub rule-set", where identical tag rules are replaced. ## ## * The second item is a list of key->value mappings (dict) that indicates specific ## strategy parameters with corresponding validation rules. ## ## This strategy consists of five items: ## ## * MARC TAGS: ## These MARC tags represents the fields taken from original record and any records from search ## results. When several MARC tags are specified with a given match-strategy, all the fields ## associated with these tags are matched together (i.e. with key "100__a,700__a", all 100__a ## and 700__a fields are matched together. Which is useful when first-author can vary for ## certain records on different systems). ## ## * COMPARISON THRESHOLD: ## a value between 0.0 and 1.0 specifying the threshold for string matches ## to determine if it is a match or not (using normalized string-distance). ## Normally 0.8 (80% match) is considered to be a close match. ## ## * COMPARISON MODE: ## the parse mode decides how the record datafields are compared: ## - 'strict' : all (sub-)fields are compared, and all must match. Order is significant. ## - 'normal' : all (sub-)fields are compared, and all must match. Order is ignored. ## - 'lazy' : all (sub-)fields are compared with each other and at least one must match ## - 'ignored': the tag is ignored in the match. Used to disable previously defined rules. ## ## * MATCHING MODE: ## the comparison mode decides how the fieldvalues are matched: ## - 'title' : uses a method specialized for comparing titles, e.g. looking for subtitles ## - 'author' : uses a special authorname comparison. Will take initials into account. ## - 'identifier' : special matching for identifiers, stripping away punctuation ## - 'date': matches dates by extracting and comparing the year ## - 'normal': normal string comparison. ## Note: Fields are considered matching when all its subfields or values match. ## ## * RESULT MODE: ## the result mode decides how the results from the comparisons are handled further: ## - 'normal' : a failed match will cause the validation to immediately exit as a failure. ## a successful match will cause the validation to continue on other rules (if any) ## - 'final' : a failed match will cause the validation to immediately exit as a failure. ## a successful match will cause validation to immediately exit as a success. ## - 'joker' : a failed match will cause the validation to continue on other rules (if any). ## a successful match will cause validation to immediately exit as a success. ## ## You can add your own rulesets in the dictionary below. The 'default' ruleset is always applied, ## and should therefore NOT be removed, but can be changed. The tag-rules can also be overwritten ## by other rulesets. ## ## WARNING: Beware that the validation quality is only as good as given rules, so matching results ## are never guaranteed to be accurate, as it is very content-specific. CFG_BIBMATCH_MATCH_VALIDATION_RULESETS = [('default', [{ 'tags' : '245__%,242__%', 'threshold' : 0.8, 'compare_mode' : 'lazy', 'match_mode' : 'title', 'result_mode' : 'normal' }, { 'tags' : '037__a,088__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'identifier', 'result_mode' : 'final' }, { 'tags' : '100__a,700__a', 'threshold' : 0.8, 'compare_mode' : 'normal', 'match_mode' : 'author', 'result_mode' : 'normal' }, { 'tags' : '773__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'title', 'result_mode' : 'normal' }]), ('980__ \$\$a(THESIS|Thesis)', [{ 'tags' : '100__a', 'threshold' : 0.8, 'compare_mode' : 'strict', 'match_mode' : 'author', 'result_mode' : 'normal' }, { 'tags' : '700__a,701__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'author', 'result_mode' : 'normal' }, { 'tags' : '100__a,700__a', 'threshold' : 0.8, 'compare_mode' : 'ignored', 'match_mode' : 'author', 'result_mode' : 'normal' }]), ('260__', [{ 'tags' : '260__c', 'threshold' : 0.8, 'compare_mode' : 'lazy', 'match_mode' : 'date', 'result_mode' : 'normal' }]), ('0247_', [{ 'tags' : '0247_a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'identifier', 'result_mode' : 'final' }]), ('020__', [{ 'tags' : '020__a', 'threshold' : 1.0, 'compare_mode' : 'lazy', 'match_mode' : 'identifier', 'result_mode' : 'joker' }]) ] ## CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT -- Determines the minimum percentage of the ## amount of rules to be positively matched when comparing two records. Should the number ## of matches be lower than required matches but equal to or above this limit, ## the match will be considered fuzzy. CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT = 0.65 ## CFG_BIBMATCH_SEARCH_RESULT_MATCH_LIMIT -- Determines the maximum amount of search results ## a single search can return before acting as a non-match. CFG_BIBMATCH_SEARCH_RESULT_MATCH_LIMIT = 15 ###################################### ## Part 21: BibAuthorID parameters ## ###################################### # CFG_BIBAUTHORID_MAX_PROCESSES is the max number of processes # that may be spawned by the disambiguation algorithm CFG_BIBAUTHORID_MAX_PROCESSES = 12 # CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS is the max number of threads # to parallelize sql queries during personID tables updates CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS = 12 # CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY defines the user info # keys for externally claimed records in an remote-login scenario--e.g. from arXiv.org # e.g. "external_arxivids" for arXiv SSO CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY = # CFG_BIBAUTHORID_AID_ENABLED # Globally enable AuthorID Interfaces. # If False: No guest, user or operator will have access to the system. CFG_BIBAUTHORID_ENABLED = True # CFG_BIBAUTHORID_AID_ON_AUTHORPAGES # Enable AuthorID information on the author pages. CFG_BIBAUTHORID_ON_AUTHORPAGES = True # CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL defines the eMail address # all ticket requests concerning authors will be sent to. CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL = info@invenio-software.org #CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE defines if the optional arXive stub page is skipped CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE = False ######################################### ## Part 22: BibCirculation parameters ## ######################################### ## CFG_BIBCIRCULATION_ITEM_STATUS_OPTIONAL -- comma-separated list of statuses # Example: missing, order delayed, not published # You can allways add a new status here, but you may want to run some script # to update the database if you remove some statuses. CFG_BIBCIRCULATION_ITEM_STATUS_OPTIONAL = ## Here you can edit the text of the statuses that have specific roles. # You should run a script to update the database if you change them after having # used the module for some time. ## Item statuses # The book is on loan CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN = on loan # Available for loan CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF = on shelf # The book is being processed by the library (cataloguing, etc.) CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS = in process # The book has been ordered (bought) CFG_BIBCIRCULATION_ITEM_STATUS_ON_ORDER = on order # The order of the book has been cancelled CFG_BIBCIRCULATION_ITEM_STATUS_CANCELLED = cancelled # The order of the book has not arrived yet CFG_BIBCIRCULATION_ITEM_STATUS_NOT_ARRIVED = not arrived # The order of the book has not arrived yet and has been claimed CFG_BIBCIRCULATION_ITEM_STATUS_CLAIMED = claimed ## Loan statuses # This status should not be confussed with CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN. # If the item status is CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, then there is # a loan with status CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN or # CFG_BIBCIRCULATION_LOAN_STATUS_EXPIRED. # For each copy, there can only be one active loan ('on loan' or 'expired') at # the time, since can be many 'returned' loans for the same copy. CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN = on loan # The due date has come and the item has not been returned CFG_BIBCIRCULATION_LOAN_STATUS_EXPIRED = expired # The item has been returned. CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED = returned ## Request statuses # There is at least one copy available, and this is the oldest request. CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING = pending # There are no copies available, or there is another request with more priority. CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING = waiting # The request has become a loan CFG_BIBCIRCULATION_REQUEST_STATUS_DONE = done # The request has been cancelled CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED = cancelled # ILL request statuses CFG_BIBCIRCULATION_ILL_STATUS_NEW = new CFG_BIBCIRCULATION_ILL_STATUS_REQUESTED = requested CFG_BIBCIRCULATION_ILL_STATUS_ON_LOAN = on loan CFG_BIBCIRCULATION_ILL_STATUS_RETURNED = returned CFG_BIBCIRCULATION_ILL_STATUS_CANCELLED = cancelled CFG_BIBCIRCULATION_ILL_STATUS_RECEIVED = received # Acquisition statuses CFG_BIBCIRCULATION_ACQ_STATUS_NEW = new CFG_BIBCIRCULATION_ACQ_STATUS_ON_ORDER = on order CFG_BIBCIRCULATION_ACQ_STATUS_PARTIAL_RECEIPT = partial receipt CFG_BIBCIRCULATION_ACQ_STATUS_RECEIVED = received CFG_BIBCIRCULATION_ACQ_STATUS_CANCELLED = cancelled ## Library types # Normal library where you have your books. I can also be a depot. CFG_BIBCIRCULATION_LIBRARY_TYPE_INTERNAL = internal # external libraries for ILL. CFG_BIBCIRCULATION_LIBRARY_TYPE_EXTERNAL = external # The main library is also an internal library. # Since you may have several depots or small sites you can tag one of them as # the main site. CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN = main # It is also an internal library. The copies in this type of library will NOT # be displayed to borrowers. Use this for depots. CFG_BIBCIRCULATION_LIBRARY_TYPE_HIDDEN = hidden ## Amazon access key. You will need your own key. # Example: 1T6P5M3ZDMW9AWJ212R2 CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY = ###################################### ## Part 22: BibClassify parameters ## ###################################### # CFG_BIBCLASSIFY_WEB_MAXKW -- maximum number of keywords to display # in the Keywords tab web page. CFG_BIBCLASSIFY_WEB_MAXKW = 100 ######################################## ## Part 23: Plotextractor parameters ## ######################################## ## CFG_PLOTEXTRACTOR_SOURCE_BASE_URL -- for acquiring source tarballs for plot ## extraction, where should we look? If nothing is set, we'll just go ## to arXiv, but this can be a filesystem location, too CFG_PLOTEXTRACTOR_SOURCE_BASE_URL = http://arxiv.org/ ## CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER -- for acquiring source tarballs for plot ## extraction, subfolder where the tarballs sit CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER = e-print/ ## CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER -- for acquiring source tarballs for plot ## extraction, subfolder where the pdf sit CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER = pdf/ ## CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT -- a float representing the number of seconds ## to wait between each download of pdf and/or tarball from source URL. CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT = 2.0 ## CFG_PLOTEXTRACTOR_CONTEXT_LIMIT -- when extracting context of plots from ## TeX sources, this is the limitation of characters in each direction to extract ## context from. Default 750. CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT = 750 ## CFG_PLOTEXTRACTOR_DISALLOWED_TEX -- when extracting context of plots from TeX ## sources, this is the list of TeX tags that will trigger 'end of context'. CFG_PLOTEXTRACTOR_DISALLOWED_TEX = begin,end,section,includegraphics,caption,acknowledgements ## CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT -- when extracting context of plots from ## TeX sources, this is the limitation of words in each direction. Default 75. CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT = 75 ## CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT -- when extracting context of plots from ## TeX sources, this is the limitation of sentences in each direction. Default 2. CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT = 2 ###################################### ## Part 24: WebStat parameters ## ###################################### # CFG_WEBSTAT_BIBCIRCULATION_START_YEAR defines the start date of the BibCirculation # statistics. Value should have the format 'yyyy'. If empty, take all existing data. CFG_WEBSTAT_BIBCIRCULATION_START_YEAR = +###################################### +## Part 25: Web API Key parameters ## +###################################### + +# CFG_WEB_API_KEY_ALLOWED_URL defines the web apps that are going to use the web +# API key. It has three values, the name of the web app, the time of life for the +# secure url and if a time stamp is needed. +#CFG_WEB_API_KEY_ALLOWED_URL = [('search/\?', 3600, True), +# ('rss', 0, False)] +CFG_WEB_API_KEY_ALLOWED_URL = [] + #################################### ## Part 25: BibSort parameters ## #################################### ## CFG_BIBSORT_BUCKETS -- the number of buckets bibsort should use. ## If 0, then no buckets will be used (bibsort will be inactive). ## If different from 0, bibsort will be used for sorting the records. ## The number of buckets should be set with regards to the size ## of the repository; having a larger number of buckets will increase ## the sorting performance for the top results but will decrease ## the performance for sorting the middle results. ## We recommend to to use 1 in case you have less than about ## 1,000,000 records. ## When modifying this variable, re-run rebalancing for all the bibsort ## methods, for having the database in synch. CFG_BIBSORT_BUCKETS = 1 ########################## ## THAT's ALL, FOLKS! ## ########################## diff --git a/modules/bibauthorid/etc/name_authority_files/male_firstnames.txt b/modules/bibauthorid/etc/name_authority_files/male_firstnames.txt index 415ecc1f5..0d8bba894 100644 --- a/modules/bibauthorid/etc/name_authority_files/male_firstnames.txt +++ b/modules/bibauthorid/etc/name_authority_files/male_firstnames.txt @@ -1,5787 +1,5786 @@ aakarshan aaron abaddon abba abban abbas abbott abdel abdiel abdieso abdul abdullah abe abejundio abel abelard abenzio abercio abernethy abhay abhi abhijit abhinav abhishek abhorson abi abie abiel abijah abir abisha abner abraham abram abrasha absalom abudemio abundiantus acacio acario accursius ace acelin achilles achyuta ackerley ackley acton acuzio adair adalardo adalberto adalgiso adalrico adam adan adar addai addison addo adeben adeipho adel adelais adelard adelbert adelfried adelino adelmo adelphos adem ademaro aden adeodatus adhamh adin adir adiran aditya adlai adler adley admes admon adnan adney adolfo adolph adon adoni adonis adony adrastos adrian adriano adriel aegeon aemilius aeneas aeolus aeron aetos afro agabo agamemnon agatone agilard agosto agrippa agu agustin ahab ahearn ahmad ahmed ahmik ahren aidan aiden aiken ailen aimery aimon aindreas ainsley aitan ajani ajatashatru ajax ajayi ajit ajitabh akaash akama akando akbar akello akil akim akira akiva akiyama akram aksel akshay aladdin alan aland alarbus alard alaric alasdair alastair alban alber alberich albern albert alberto albin albion alcander alcibiades alcott alden alder alderney aldo aldous aldred aldrich aldridge aldwin aldwyn alec aled alejandro aleron aleser alessandro alessio alex alexander alexas alexis alfons alfonso alfonzo alford alfred alfredo alger algernon algren ali alika alim alison alistair alister allambee allan allard allen alleyne allighiero allister almo almon alok alonso alonzo aloysius alperen alphonse alphonso alric alroy alston altair altman alton alun alva alvah alvaro alvin alvis alwan alwin alwyn amadeus amadi amado amador amal amar amaro amaroo amato ambar ambert ambler ambrose amery amherst amida amiel amiens amin amir amiri amirov amistad amit amitabh amitava ammon amol amon amory amos amrit amsden amul amulya amund anand anant anastasius anatole anay ancel anders anderson andre andras andrea andreas andrej andres andreus andrew androcles andy aneurin ang angada angel angelo angus angwyn anibal anieli anil anirudhh anish aniston anjuman anker ankit ankur annan anniruddha anno anoke anoki anoop ansari anscom ansel anselm ansgar anshul anshuman ansley anson anstice antal anthony antigonus antiochus antione antipholus antoine anton antone antoni antonia antonio antony antwan anu anwar anwell anyon apache apari apemantus apollo apostolos aquila ara araluen aram aran arana aras archard archer archibald archidamus archie arda arden ardley ardolph ardon aren arend ares argus argyll ari arian aric aricin ariel ariki arion aristedes aristo aristotle arizona arje arjen arjun arkin arkwright arland arlen arley arlie arlo armand armande armando armen armin armon armstrong arnall arnaud arne arnett arnie arno arnold arnoldo arnon arnulfo aron aronne aroon arpiar arran arron arsen arsenio art arthur arthus artie arturo arty aruiragus arun arundel arunta arvad arval arvid arvin arvind arviragus arwin asa ascot aseem ash ashburn ashby asher ashford ashish ashley ashliegh ashlin ashok ashon ashraf ashton ashur ashutosh ashwin ashwini asija asim askel aslak asparouh astin aston asuman asvathama asvin aswad aswin asztrik atalik atarah atawn athan atharvan athelstan atherol atherton athol athos atilla atiu atlas atley atmajyoti atman atrayl atrus attila attis atul atulya atwater atwell atworth auberon aubert aubin aubrey auburn audric audun august augustine augustus aurek aurelio aurelius austell austen austin autolucus avan avel avenall averell averill avery avinash aviv avner avon avram avrom axel axton ayer ayhner aylward aylwin ayush azad azariah azi azim aziz azriel azzan babar babul bacchus bae bahar bailey baingana baird bairn bajnok bakari baker balavan balbo balder baldric baldwin bale balfour bali balint ballard balram balthasar balthazar balun bancroft bandele bane banjora bankim banning banquo bansi baptista barabas barak baran barber barclay barden bardo bardolph bardon barega barend bari barlow barna barnabas barnard barnardine barnardo barnet barnett barney barnum barny baron barrett barry bars barse bart barth bartholomew barton baruch barwon bashir basil basim bassanio bassett bastiaan bastian bastien bates batuhan baudouin baul bavol baxter bay bayanai bayard bayley bazyli beacan beacher beagan beaman beardsley beathan beattie beau beaumont beauregard bebe becan beck becse bede bedrich bela belarius beldon belen bellamy bem bemus ben bence bendix benedick benedict benedikt benito benjamin benjy bennet bennett bennie benno benny benson bentley benton benvolio berat berenger berg bergen berger bergren beriszl berk berkay berkeley berkly bernard bernardo bernie berny berowne berrigan berry bersh bert berthold bertie bertram berwyn besnik beval bevan beverly bevis beyers bhagirath bharat bhaskar bhavesh bhavin bhavya bhim bhima bhishma bhrigu bhudev bhupen bhupendra bhuvan bialy bilal bill billie billy bing bingham binyamin biondello birch bishop bitalo bjarne bjorn björn blade blaine blair blaise blake blakeley blaxland blaxton blaz bledig blythe boa boaz bob bobbie bobby bod boden bodo bodor bogart bogdan bolton bonamy bonar bond bonner booker boone booth borachio borden borg boris bosley boswell bosworth botan botond bottom boult bourke bowen bowie bowman boyce boyd boyden boyet brabantio brad braden bradford bradley bradly bradwell brady brae brahnan brain bram bramwell bran branch brand brandeis branden brander brandon branko brant braxton bray brayden braz brazil brecon brencis brendan brendon brennan brent brenton breok bret breton brett brewster briac brian briand brice brigham brighton brij brijesh brinley brishen bristol britt brock broderick brodie brodny brody brom bromley bronson brook brooke brooks broughton brown bruce bruno brutus bryan bryant bryce brychan bryn brynmor bryon buck buckley bud buddy budi buford burak burchard burdon burgess burhan burian burke burl burle burley burnaby burnard burne burnell burnet burnu burnum burr burt burton busby buster butler byford byng byron cable cadby cadell cadeo cadman cadmus cadogan caedmon caerwyn caesar cahil cailan cailean cain caine caithness caius cajan cal calder caldwell caleb caley calhoun caliban calisto calixto callis calum calvert calvin calvine cam camden cameron camille camillo camlin campbell can candan candidius cane canice cannon canute capers caphis capucius capulet caradoc carden cardew carel carey carl carleton carlin carlisle carlo carlos carlton carlyle carlyon carmelo carmen carmine carne carnelian carol carr carrick carrington carrol carroll carson carsten carsyn carter carvell carver carwyn cary case casey casimir caspar casper cassidy cassio cassius castel cathal cathan cathmor cato caton cavan cayden ceasar cecil cedric cedrick cem cemal cengis cerimon cesar chad chadwick chahaya chaika chaim chal chale challis chalmers chaman chan chance chancellor chancey chandan chander chandler chandra chane chaney chang channing chapal chapin chapman charan charles charleton charley charlie charlton chas chase chata chatillon chaucer chauncey chayton chen cheney cherokee chester chet chetan chetwin chevalier chevy cheyenne cheyney chi chiamaka chico chike chilton chin chinmay chintu chiranjeev chiron chong chrirag chris christian christie christmas christoper christoph christophe christof christopher christos christy chrysander chuck chuckie chung churchill cian ciaran cicero ciceron ciel ciprian ciprien ciro clachas clair clancy clarence clarenzio clark clarke claud claude claudio claudius claus clay clayland clayton cleary cledwyn clem clemens clement clemente cleo cleon cletus cleve cleveland cliff clifford clifton clint clinton clitus clive cloten clovis clunes clyde coalan cobar cobden cobweb cody cohn colbert colby cole coleman colin collin colman colon coltin colton columba columbus coman comfort cominius compton conall conan condon conlan conley conn conner connie connley connor conor conrad conrade conroy constantine conway cooper coorain corban corbett corcoran cordell corey corin cormac cornelian cornelius cornell cort cortez corvin corwin cory cosmo costard coty court courtland courtney coy craig crandon cranley cranmer cranog crawford creighton crewe crisiant crispin cristian cristiano cristo cristobal cristopher cromwell cronan crosby cruz csaba cseke csenger csepel csombor csongor ctirad cuba cubert cullen cupid curan curio curnow curran currier curry curt curtis cuthbert cutler cutter cymbeline cynfor cynric cyprian cyrano cyric cyril cyrus dabert dacey dade dafydd dag dagan dagobert dahana dai dail dakarai dakota dakshesh daku dalbert dale daley dallas dallin dalton daly dalziel damek damen damian damien damion damodar damon dan dana danby dane danial daniel danilo danior dannie danny dante dara darby darcy darel darell daren darian darien darin dario darius darnell daron darrel darrell darren darrick darrin darron darryl darshan dartagnan darthmouth darton daruka darwin daryl dattatreya dave davey david davidson davie davin davis davy dawa dayton deacon dean deandre deangelo decker declan decretas dedric dedrick dee deepak deiphobus dejan del delaney delano delbert deli dell delling delmar delmer delmore delroy delsin delwyn deman demarcus demas demetrius demian demitrius demos dempe dempsey dempster demyan denby denes denham denholm denis deniz denley dennis dennison denny denton denver denzel denzil deo deon derain derby derek derex derick dermot derrell derren derrick derron derry derward derwent derwin derwood derwyn des deshawn desiderio desiderius desmond dev devang devante devarsi devdan deverell devereux devesh devi devin devitri devlin devon devrity dewayne dewey dewi dewitt dexter dhananjay dharma dharmavira dharmendra dharmesh dharuna dhatri dhaval dheran dhruv diamond dian diarmad diarmid dick dickie dickinson dickson dicky didier diederik diego dieter dietrich digby diggory dilip dillon dimitri dinesh dinkar dino dinsdale diomedes dion dione dionysus dirk dirke divyesh dixon djavan dmitri dobry dodd dogberry dolabella dolan dolf dolph domenic domingo dominic dominick dominique domokos don donahue donalbain donald donatien donato donegal dong donn donnell donnelly donnie donny donoghue donohue donovan donte dooley dorak doran dorian dorjee doron dorset dorsey dory doug dougal douglas douglass dov dover dow doyle dragan drake drew driscoll dromio drostan druce drury dryden drystan duane duarte dudley duff dugal dugald dugan duglas duke dull dumaine duman dunbar duncan dunham dunley dunmore dunn dunstan dural durand duranjaya durant durdanius durham durjaya durmada durriken durward durwin dusan dustin dusty dutch dvimidha dwain dwaine dwane dwayne dwennon dwight dyami dyfan dylan dymas dyre eachan eamnonn eamon earl earle earnest earvin eaton eban ebenezer eberhard ebrahim ed edan edbert eddie eddy eden edgar edgardo edison edlin edmond edmund edmundo edolf edom edric edsel eduard eduardo edward edwardo edwin efe efrain efram efrem efren egan egbert egerton egeus egil eglamour egmont egon egor egyed ehner ehno ehnore ehren ehud eideard eike einar eirik eisig ekachakra eknath eladio elan eland elbert elbow elden eldin eldon eldred eldric eldrich eldridge eldwin eleazar elek elezar elgan elgar eli elia elian elias elijah eliot eliseo elisha elkan ellar ellard ellery elliot elliott ellis ellison ellsworth elmar elmer elmo elner eloy elroy elsdon elston elton elu elvin elvis elvy elward elwin elwood ely emanuel emerson emery emil emile emilian emilio emin emir emirhan emlen emlyn emmanuel emmet emmett emmitt emory emre emrey emrick emrys emyr enda endymion eneas enes engelbert ennis enno ennor enoch enos enrico enrique ensar enzo eoin ephraim eraldo erasmo erasmus erastus eray ercole erebus erek eren erhard eric erich erick erik erin erith erland ermanno ernest ernesto ernie eros errol erroll erskine eruera ervin erwin eryx esau esbern escalus escanes esidor esmond esra essex essien este esteban estes ethan ethelred etienne eton ettore etzel euan eudor eugene eugenio eumann euridice eurwyn eusebio eustace evan evander evangelos evelyn everard everet everett everette everild everley evzen ewald ewan ewart ewing eyan eydie eymen ezekiel ezequiel ezio ezra faber fabian fabien fabio fabrice fabron fadil fagan fairfax fairley faisal falgun falk falkner fallon fane faraji farand farid fariel farkas farley farman farnell farnley farook farquhar farrar farrell farriss faulkner faust faustino fausto favian faxon faysal fear februus fedele federico felipe felix felton fenn fenton fenwick feodore ferdinand ferenc fergal fergus ferguson fermin fernando fernleigh fernley feroz ferran ferrand ferrer ferris feste festus ffionn fiachra fidel fielding fiete fife figaro filbert filiberto filip filippo finbar fineas fingal finian finlay finley finn finnegan fionn firdos firmin firth fisk fitch fitz fitzgerald fitzhugh fitzjames fitzpatrick fitzroy fjodor flannan flannery flavian flavius fleance fleeting fleming fletcher flint floke florencio florentino florian floritzel floyd fluellen flynn fodor folant folkus fonz fonzie forbes ford forest forester forrest forrester forster fortescue fortinbras fortunato foster frances francesco francis francisco frank frankie franklin franklyn frans franz fraser frasier frayne fred freddie freddy frederic frederick fredric fredrick free freeman fremont friedrich frigyes frith fritz froth fudo fujita fulbright fuller fulton furkan furnell fyfe fynn fyodor gabai gable gabor gabriel gadiel gadil gafna gagan gage gail gair gaius galahad gale galen galeno galip gallagher gallard galloway gallus galor galton galvin galway gamal gamaliel gaman gamba gamble gamel ganan gandolf ganesh gannon ganymede gara gardiner gardner gareth garett garfield garin garland garman garmond garner garnet garret garrett garrick garridan garrison garron garry garth garton garvey garvin garwood gary garyson gaspar gassy gaston gaurav gautam gautama gautier gavan gavin gavrie gavril gawain gayle gaylord geary gedeon geert geet geir gelar genaro gene genesis geoff geoffrey geordi geordie georg george georgios gerad geraint gerald geraldo gerard gerardo gerik germain german gerome geronimo gerrit gerry gershom gervase gerwyn gerzson gethin ghassan giacobbe giacomo gian gianluca gianni giasone gibson gideon gifford gil gilbert gilberto gilby gilchrist giles gilford gillespie gillet gilmer gilroy gino ginton giordano giovanni giraldo girish girra girvan giuliano giulio giuseppe givon gladstone gladwin glanville glen glendon glenn glover glyn glynn goddard godfrey godwin golding goliath gomer gomez gonzalo gopal gordon gordy gore gorman goronwy gorran gottfried gough govinda gower grady graeme graham graig gram grandpro granger grant grantham granville gratian gratiano grayson greg greger gregg gregor gregorio gregory gremio gresham greville griffin griffith griswold grosvenor grover grumio gryffyn guadalupe gugliehno guiderius guido guildenstern guildford guillaume guillermo gul gulab gunnar gunther guntur gurion gurkan gus gustav gustavo guthrie guy gwilym gwyn gwynfor gye gyles gyula habib hackett hadar hadden haddon hadi hadley hadrian hadwin hafiz hagan hagen hagley hahn hai haig haile haines hakan hakim hakon hal halbert halden haldor hale haley halford halian halifax halil hall hallam halse halsey halstead halsten halton halvard hamal hamar hamid hamilton hamish hamlet hamlin hamon hampton hamza hanan handel hanford hani hank hanke hanley hannes hannibal hanno hannu hans hansel hansi hanson haral harald harcourt harden hardik harding hardwin hardy hare haresh harford hargreave hari harish harith harlan harland harleigh harley harlow harman harold haroun harper harris harrison harry harsh hart hartley hartman hartwell hartwin hartwood haru harun harvey harwin harwood hasad hasan hasim haslett hassan hastin hastings havelock haven havika hawley hayden hayes hayward haywood hazlett hazlitt hearst heath heathcliff heathcote hecate hector heddwyn hedley heilyn heinrich heinz helaku helenus helicanus heller helmut helmuth hemal hemang hemant hemendra hemi henderson hendra hendrik hendy henleigh henley hennes henning henri henrik henry herb herbert hercules heriberto herman hermes hermon hernando herold herrick herschel hershel herst hertford herve hervey herwin hesketh hew hewett hewie hewston heyward hiatt hieronymus hilario hilary hildebrand hillel hilton hipolito hippolyte hiram hiroshi hirsh hiten hitendra hitesh hobart hobert hogan holbrook holden holger holgernes hollis holman holmes holofernes holt homer hong hont hopkin horace horacio horatio hori horst hortensio hortensius horton hosea houghton houston howard howe howell howie hoyt hridayesh hrishikesh hsin huatare huba hubert hudson huey hugh hugo humbert humberto hume humphrey hung hunor hunter huntley huon hurst hussain hussein hutton huw huxley hyam hyatt hyde hylton hyman hymen hyram hywel iaap iachima iagan iago iain ian ibeamaka ibrahim icarus ichabod iden idris idwal iestin iestyn ieuan ifor ignacio ignatius igor ihorangi ike ikey ilar ilario ilhan ilias ilie ilja ilya imam immanuel imre ince indra indro ingemar inger inglebert ingmar ingo ingolf ingram inigo innes innocent ioannes iolo iolyn ion iorweth ira iravan irawaru irvin irving irwin isa isaac isaiah isaias isha ishmael ishver isiah isidore isidro ismael ismail israel isreal issac istvan itzaak itzak itziamar itzik ivan ivar ives ivo ivor ivory izaak jabari jabez jabir jace jacek jacinto jack jackie jackson jacob jacques jacy jaden jae jaedon jael jafar jagdish jagger jago jaiden jaidev jaidon jaime jake jakob jakub jal jaleel jalen jalil jamaal jamal jamar jame jamel james jamey jamie jamieson jamison jan janak janardan janne jannes jano janosch janus japhet jaques jared jarek jarel jarlath jarman jarne jarno jaro jarod jaron jaroslav jarrad jarrah jarratt jarred jarrett jarrod jarvis jaryn jason jasper jatin java javan javed javier jay jayant jayden jayson jaysukh jayvyn jean jed jedd jedediah jedidiah jedrek jeevan jeff jefferey jefferson jeffery jeffrey jeffry jehosophat jelani jendrik jenkins jens jensen jerald jeramy jerara jere jered jeremiah jeremias jeremy jeri jericho jermain jermaine jermyn jerod jerold jerolin jerome jeromy jerrard jerrell jerrie jerrod jerrold jerry jervaise jervis jerzy jesper jess jesse jessie jesus jet jethro jetmir jewel jewell jibril jiger jilesh jim jimmie jimmy jimuta jin jinesh jiri jiro jirra jiten jitender jitendra jivana jivin joab joachim joakim joan joaquin job jocelin jock jody joe joel joesph joey joh johann johannes john johnathan johnathon johnie johnnie johnny johnson jolyon jon jonah jonas jonathan jonathon jonny jonte joost joram jordan jordon jorge jorgen joris jorma jory jose josef joseph josh joshua josiah jospeh joss josue jourdain jourdan jove jowan jozef jozsef juan judah judd jude judson jules julian julien julio julius juma jung junior junius jurgen juri justin justus jyotis kaan kabir kabos kada kadin kadir kadosa kahn kahoku kai kaikara kailash kain kalani kalb kalden kale kaleb kaleo kalid kalidas kalil kalkin kalle kalman kalpanath kalti kama kamadev kamal kami kamil kamlesh kanak kanan kanaye kane kaniel kano kapil kaplony kapolcs karan kardal kardos kareem karel kari karim karl karma karol karsa karsten kartal kartik kartikeya kasch kasen kasey kasim kasimir kaspar kasper kateb kathel kauri kaushal kaushik kavan kavi kay kayin kayne kazimir kean keane keanu kearney keary keaton kedar keefe keegan keeland keeley keenan keeran kees kegan keir keiran keith kelan kelby keled keleman kell kellen keller kelley kelly kelsey kelso kelt kelvin kemal kembell kemble kemenes kemp ken kenan kendall kende kendrick kenelm keneth kenley kenn kennard kennedy kenneth kennith kenny keno kenrich kenrick kent kenton kenver kenward kenwyn kenya kenyon keon keona keoni ker kerby kerecsen kerem kereteki kerim kermit kern kernick kernow kerr kerrin kerry kers kersen kerwin keshav kester ketan keve keven keverne kevin keyon khairi khalid khalif khalil khorshed khortdad khoury kian kiefer kieran kieth kilian killara killian kim kimball kimberley kimi kin king kinga kingsley kingston kinnard kinnel kinsey kintan kip kipling kipp kiran kirby kirill kirit kirk kirkley kirkwood kiron kirwin kisho kishore kit kito kitto kiva kiyoshi kjell klaas klaes klaud klaus klea klemens kliment knox knut kolet kolja kolos kolya konan konol konrad konstantin kont kontar koora koorong koray korbinian korey kornel korvin kory kosmo kostya kovan kozma kraig kripa kris krischnan krishna krispen krispin kristen kristian kristofer kristoffer kristopher krunal kulan kuldeep kulvir kumar kunal kund kupe kurt kurtis kuruk kusagra kush kushan kwan kyle kyler kynan kyne kyran laban label laborc lachlan lacy ladd ladislav ladomir lae laertes lafayette lafeu lai laibrook laidley laird lakota lakshman lakshya lalit lam laman lamar lambert lamberto lamech lamont lance lancelot lander landers landon lane lang langford langley langston langworth lani lann lanny lantos lanyon laoghaire laris larrie larry lars larson lartius lascelles lasse laszlo latham latif latimer lauchlan laughlin launce launcelot lauren laurence laurie laurin lavache lavern laverne lawerence lawford lawler lawley lawrance lawrence lawson lawton laxman layland layton lazaro lazarus leal lean leander leandro lear lebeau lech lee lehel leif leigh leighton leith lel leland lemuel len lenard lennie lennon lennox lenny lensar leo leon leonard leonardo leonas leonato leonel leonhard leonidas leonine leontes leopold leopoldo leron leroy les leshem lesley leslie lesta lester lev levent levente leverett leverton levi levin lewie lewis lex leyman liall liam lian lias lief lincoln lind lindan lindberg lindell linden lindley lindsay lindsey linford linley lino linton linus linwood lio lion lionel lipet lisle litton livingston ljluka llewellyn llfryn lloyd lobsang loch locke lockwood lodovico logan lokesh loman lombard lome lon london long longaville lonnie lonny lorand lorant loren lorenz lorenzo lorik lorimer lorin lorinc loring loris lorne lothair lothar lothario lou loughlin louie louis lovel lovell lowan lowell loxley loyal loyd luan luc luca lucas lucentio lucian luciano lucien lucilius lucio lucius lucretius lucullus ludlow ludovic ludvig ludwig ludwik luigi luis luka lukas luke lundy lunt lupe luther lutz luzio lykaios lyle lyman lymoges lyn lyndell lyndon lynn lynwood lyonel lyre lysander lysimachus lytton maaka maarten mablevi mabon mac macarius macbeth macdonald macduff mace macey mack mackay mackenzie macmorris macon macy madan maddock maddox madhav madhusudhana madison madoc madron magee magne magnus magus mahabala mahavira mahendra mahesh mahir mahmood mahomet mahon mailo maitland maitreya majid major maka makani makepeace makis mako maksim makya mal malachi malcolm malcom malden malik malin malise mallee mallory malone malte malvern malvolio mamillius mamoru man manavendra manchu manco mandek mandel mander mandhatri mandu manfield manfred manfried mani manik manish manley mannie manning mannix manny manoj mansa mansoor mansukh mansur manu manual manuel manus marama marc marcade marcel marcelino marcell marcello marcellus marcelo marcin marco marcos marcus marden mardian marek margarelon margarito -maria marian mariano marijan marino mario marion marius mark markandeya markus marland marley marlin marlo marlon marlow marmaduke marmion maron marot marquis marron marsden marsh marshall marston marten martin martius marty marvin marvyn mary masa masakazu maslin mason massimo masud matai matanga matareka matari mather mathew matt matteo mattes matthew matthias matti maui maurice mauricio maurilio mauro mawgan max maxey maxim maximilian maximo maxwell mayer maynard mayon mckinley mead meara mearann mecaenus medord medwin megyer mehetabel mehmet mehul meir meirion meka mel melancton melbourne melburn melchior melford melik melor melrose melun melville melvin melvyn menachem menadue menas mendel menecrates menelaus menenius menteith menyhirt mercade mercer mercutio meredith merle merlin merrick merrill merryn mert merten merton merv mervin mervyn messala mete meyer mica micah michael michal michale micheal michel michelangelo mick mickey midas miguel mihaly mihir mika mikael mikail mike mikel mikhail miki mikkel mikkeli mikko miklos miko miksa milan milbourn milburn miles milford milind millard miller milo milos milow milton minar minas miner minesh ming mingma minh minos mio miquel mirac miran miro miroslav mischa mitali mitch mitcham mitchel mitchell mitesh mladen modesto moffatt mog mogens mogo mohamed mohammad mohammed mohan mohin mohinder mohit moises molloy monro monroe montague montana montano monte montego montgomery monti montmorency monty mopsa mor moray morcum mordecai mordred morgan moriarty morice moritz morland morley morrell morrie morris morrison morry morse mort morten mortimer morton morty morven mose moses moshe mosi moss mostyn motega moth mountjoy mowan mubarak muhammad muir mukasa mukhtar mukta mukul mukunda mulga mull mullion mumtaz mungo munro murdoch murdock murphy murray musa mustafa mustardseed mutius muzaffer myall myer myles mylo mylor myron naaman nabendu nabil nabulung nachiketa nachmanke nadav nadir naeem nahum naimish nairn nakul nalong nalren nambur namdev namid namir nanda nandin nantan napoleon narayan narayana narciso narcissus nardu narendra naresh narrah narsi nartana nash nasir nassir nat natal natale natan nathan nathanael nathanial nathaniel naum naveen nawang nayan neal neale ned neddie neddy nedim neel neeraj nehemiah neil neill nek nelek nelson nemo neo nerang nerhim nero neron nesim nesip nestor nevada nevan neven neville nevin nevio newbold newell newlyn newman newton niall nic nicholas nick nickie nickolas nickson nicky nico nicodemus nicol nicolas nieander niel niels nieodemus nigel nihar nike nikhil nikita niklaus nikola nikunj nilay nils nilson nima nimai nimbus nimesh nimrod ninian nino nioka niraj niramitra niran niranjan nirav nirel nishad nishan nisi nitesh niven nivens nixon noadiah noah noam noble nodin noe noel noi nolan norbert norberto norbu norm norman normand norris north northclif northrop norton norville norvin norwell norwood nowell nowra numbers nuncio nur nurhan nuri nye nyle nym nyok oakes oakley obadiah obelix oberon obert obiajulu ochen octavio octavius oddvar odell oderico odern odil odin odion odis odolf odon odysseus ogden ogilvie ogilvy oglesby oguz okan okely okko olaf olcay oldrich ole oleg olen oleos olier olin oliver olivier ollie omar omarjeet omer ompoly ond onslow onur ora orad oram oran orazio orban ordway orelious oren orestes orfeo orford orion orlan orlando orman ormond ormos oroiti orpheus orran orren orrin orsen orsino orson orton orval orville orvin osaze osbert osborn osborne oscar osgood osip oskar oskari osman osmar osmond osric ossian osvaldo oswald oswaldo oswin otello otha othello otis ottavio otto otway ove ovid owen oxford oxton ozan ozaner ozor ozsvot paavo pablo pacifico packard paco paddy padget padgett padmakar padraig padruig page pahniro paige paine paki palani pallav palmer palti pan pancho pancras pancrazio pandarus pandita pandya pankaj panos panthino panyin paolo parag paramartha paras paris park parker parkin parlan parolles parr parri parrish parry parsefal parsifal parth partha pasang pascal pasquale pat patamon patern patony patricia patrick patroclus patton paul paulo pavel pax paxton payton peace pearce peder pedr pedrek pedro pedrog peer pelin pell pellegrin pelton pemba pembroke penley penn penrice penrith penrod penrose pentele penwyn pepe pepin pepper per percival percy peregrine pericles peril perri perry perth pete peter petroc petruchio peverall phelan phelps phil philario philbert philemon philip phillip phillips philo philostrate philotus phineas phoenix phuoc phuong picerious pickford pierce piero pierpont pierre pierrot piers pierson piet pietro pilan pinch pindan pindari pindarus pino pip piran pisanio pistol pita pitney pitt pius piusz piyush placido plato platon platt pluto pol polixenes pollock polonius pomeroy pompey pongor pontius porfirio porter powell powys prabhakar prabodh pradeep praful prakash pramana pramath pramsu pranav pranay prasad prasanth prasata prashant prasoon prassana pratap pratik pratyush praveen prayag preetish prem prentice prescott presley preston prewitt priam price primel primo prince prior prithu privrata probert proctor proculeius prometheus prosper prospero proteus pryce pryderi pryor ptolemy publius puck pulkit pundarik puranjay purujit purvis pusan puskara putnam qadir qasim quasim quemby quennel quentin quigley quillan quilliam quimby quinby quince quincy quinlan quinn quinney quintin quinton quintus quirce quirin quirino rab rabbie rad radborne radcliff radcliffe radek radford radley radman radnor radomir raeburn rafael rafe raffaele rafferty rafi rafiq rafu raghnall ragin ragnar rahman rahul raibeart raidon raik raimy rainer rainier raivata raj rajan rajanikant rajendra rajesh rajiv rakesh raleigh ralph ralston ram raman ramanuja rambert rambures ramelan rameses ramesh ramiro ramon ramsay ramsden ramsey ranald rance rand randal randall randell randie randolph randy ranen ranger rangi ranjan ranjeet ranjit rankin ransford ransley ransom rantidev ranulf raoul raphael rarna rashad rashid rashne rasmus rastus rata ratri rauf raul raven ravi ravid ravindra ravinger rawiri rawley rawlins rawly rawson ray rayan rayburn rayford raymon raymond raymundo raynard rayner raynold razi read reading reagan rearden reardon rebel redford redman redmond redmund reece reed reede rees reese reeve refugio regan reggie regin reginald regis rehan reid reilly reinaldo reinhard reinhold remington remus remy renaldo renard renato renaud rendor rene renfred renfrew rennard renny renshaw rené reuben reuel rex rey reyes reynaldo reynard reynold rhett rhisiart rhodes rhodri rhun rhydwyn rhys rian ric ricardo rich richard richie richman rick ricker rickey rickie ricky rico rider ridgeway ridgley ridley rigby rigg rigoberto rikard rikin riley rimon rinaldo ring ringo rinzen riordan ripley rishab rishi rishley rishon risley riston ritchell ritchie ritter rivers rizal roald roan roarke rob robbie robby robert roberto robertson robi robin robinson robt robyn rocco rochester rock rockley rockwell rocky rod rodd roddie roddy roden roderick roderigo rodger rodman rodney rodolf rodolfo rodrick rodrigo rodrigue rodwell rogan rogelio roger rohan rohit roland rolando roldan rolf rolland rollo roly roman romeo romney romulus ron ronak ronald ronan rongo roni ronin ronit ronnie ronny ronson rooney roosevelt roper rory rosario roscoe rosencrantz rosendo roshan roslin ross roswald roth rothwell rourke routledge rowell rowland rowley rowse rowson roxbury roy royal royce roydon royston ruark ruben rubens rubin ruchir rudd rudi rudolf rudolph rudy rudyard rueben ruel rufford rufus rugby ruhinda rumford runako rune rupert rupesh rurik rush rushford ruskin russ russel russell rusty rutger rutherford rutland rutledge rutley ryan rycroft ryder rylan ryle ryley rylie ryman ryoichi ryozo ryton ryuichi saben sabir sabola sabre sacha sachchit sacheverell sachiel sachin sadi sadik sadurni safak safford sagar sage sagiv sahadev sahale sahara sahen sahib sahnan saidi sakda sakima sal salah salerio salih salim salisbury salman saloman salomo salomon salter salton salvador salvatore sam sami samien samir sammie sammon sammy sampath sampson samson samual samudra samuel samuele sanat sanborn sancho sandeep sanders sandie sandler sandon sandor sandro sandy sanford sang sanjay sanjeev sanjog sankara sansom sansone santiago santino santo santon santos santosh santoso sapan sarasvan sarat sargent sarkis sarngin sarni sarojin sarosh sascha sasha saswata satayu satruijt saturnino saturninus satyen saudeep saul saunak saunders saunderson saurabh saviero saville sawyer sawyl saxby saxon saxton sayed sayer scarus schuyler scipio scot scott scottie scotty scully seabert seaborne seabrook sealey seamus sean seanan searle seaton sebastian sebes secundus sedgewick sedgley seeley seely seff sefton seger seif seiichi selby seldon seleucus selig selim selwyn semih sempronius semyon senach senajit senan sencer senichi sennett senon septimus serafino seraphim sercan serge sergeant sergent sergio sergius servan sesto seth setiawan seton seung sevastian severin severino severn sevilin seville seward sexton sextus seyed seymour seyton sezni shad shadwell shafiq shah shahar shailen shailesh shakar shakir shalabh shalin shallow shalom shaman shamus shanahan shandy shane shankar shanley shannon shantanu sharad sharif sharma shashi shashwat shaughan shaun shaw shawn shay shayne shea sheehan sheffield shelby sheldon shelley shelton shen shepherd sher sherborne sheridan sheridon sherlock sherman sherwin sherwood shigekazu shiloh shima shimon shing shinichi shipley shipton shirley shishir shiv shiva shlomo shmuel shoichi sholto shomari shon shrey shuichi shulamith shunichi shunnar shvetank shyam shylock siamak sid siddartha siddel siddharth sidell sidney siegbert siegfried sigebryht sigfried siggy sigi sigmund sigurd silas siler silvanus silvester silvio silvius simeon similien simon simonides simple simpson sinan sinbad sinclair singh sinjon sinnett sinnott siva sivan siward skeet skelly skelton skene skipp skipper sklaer slade slate slender slevin sloan sly smedley smith snehal snorre snout snowden snug socrates sofronio soham sol solan solanio solinus solomon solon solt solve solyom soma somerled somerset somerville son sonam sonnagh sonny sophocles soren sorley sorrell soterios southwell spalding sparke speed spence spencer spike spiridon squire sridhar srijan srikant srinath srinivas sriram stacey stack stacy staffan stafford stamford stan stanbury standen standish stanfield stanford stanhope stanislaus stanislav stanislaw stanko stanley stanton stanwick starbuck starr starveling stavros stedman steele stefan stefanos steffan stein sten stenton stephan stephano stephane stephen sterling sterne stert steve steven stevenson stevie stewart stig stiles stillman stinson stiofan stockley stockton stoddard stoke stokley storm storr stowe stoyan strahan stratford strato stratton strom stroud stu stuart studs sturt styles subodh sudarshan sudesha sudeva sudhansu sudhir sudi suffield sugriva sukarman sukumar sulaiman suleiman sulio sullivan sully sulwyn sumadhur suman sumantu sumati sumit sumner sun sundara sung sunil suresh surya sutcliffe sutherland sutton suvrata svein sven swagat swain swapnil sweeney sweeny swindon swinford swithin sycamore sydenham sydney sykes sylvain sylvan sylvester symon synclair syrus szabolcs szalok szemere szervec szesce szevor szilord szolot szymon sören taavi tab tabansi tabari tabb tabor tad tadc tadd taddeo tadi taffy taggart taha tahir tai tailer tailor tait tajo takai takoda taksa taksony taku talbot talfryn talib taliesin tallis talman talon talorg talos tam tama tamas tamer tamino tamir tamme tammo tancred tancredo tane tanek taner tangaroa tangwyn tanicus tannar tanner tano tapan tapesh tara tarang tarasios tarcal tardos taree tarik tariq tarkan tarn taro tarquin tarrant tarun tas tashi tate tathal tathan tatum taurin taurinus taurus tavis tavish tawhiri taylan taylor teague teal tean tearlach tecer tecwyn ted teddie teddy tee tej telford telo tem teman temani templar tenenan tennyson tenzin tenzing teodor teodoro tercan terence terrance terrell terrence terry thad thaddeus thanh theo theodor theodore theron thilo thomas thurman tiago till tilo tim timm timmy timo timon timothy timur tino titus tizian tjark tobias toby tod todd tom tomas tome tomi tomkin tommie tommy tomo toney tong tony tor tore torin tormey tormod torquil torr torrance torsten torvald tory toste tostig touchstone toussaint tovi townley townsend toyo tracey tracy trahaearn traherne tran tranio tranter travis trefor trefusis trelawney tremayne tremeur trent trenton trenus tresco trethowan trevelyan trevor trey trigve trilby trinculo trinidad trinity tripp trisanu tristan tristen tristram troilus trowbridge troy truman tryggvi trystan tuan tuart tuathal tubal tucker tudfwlch tudi tudor tudur tugdual tujan tuncer tungyr ture turi turiau turner turpin tushar tuvya tuyen twain twyford tyack tybalt tycho tye tyee tyler tymon tynan tyne tyree tyrek tyrell tyrol tyron tyrone tyrus tyson tytus tzuriel uba ubul udeh udell udit udo udolf uehudah ufuk ugo ugod ugor ugur uilleam uisdean uland ulbrecht ulf ulffr ulfred ulmer ulprus ulrich ultan ulucan ulysses umar umberto umut unai unni unwin upen upor upravda upton upwood uranio urban uren uri uriah urie uriel urien urjavaha uros ursel urson urvan usamah ushnisha usko usman utah utt uttam uttanka utz uwain uwan uwe uxio uyeda uzi uzor uzziah vachel vaclav vadim vahe vaibhav vail vairaja val valdemar valentin valentine valentino valeray valerian vallis valter vamana van vance vane vaninadh vanya varad varden varian varick variya varkony varocher varrius varsony vartan varun varuna vasant vasava vasilios vasily vasu vasudev vasudeva vasuman vaughan vaughn vavrin vavrinec vayk vazsony vedanga vedmundr veer veit vekoslav vencel vencentio venec venn ventidius vere vered verges vern verner vernon verrell viau vicente victor vid vidal vidor vidvan vidya vijay vikas vikram vikrant viktor vilem vilhehn viljalmr vilmos vimal vin vinay vince vincent vincentio vincenzo vineet vinnie vinny vinod vinson vinzenz vipin vipul viraj virasana virat virgil virgilio visant vishal vishnu visvajit visvakarman visvayu viswanath vitale vito vittore vittorio vitus vivatma vivek vivian vladilen vladimir vladislav volf volker volney voltimand von vortigern voteporix vougay vyasa vyvyan wade wagner wahib wahnond wain waine wainwright waite wakefield wakeman walby walchelim waldemar walden waldo walenty waleran walford walid walker wallace wally walmer walpole walsh walter walton walwyn waman wang warburton ward wardell warfield warley warmund warner warra warrain warren warrigal warrun warton warwick waseem washington wasim wassily watkin watson waverley waverly wayde wayland waylon wayne webb webster welby welch welcome weldon welford wellington wells welsh wen wenceslas wendell wendron wenlock wentworth werner wes wesley west weston westwood wetherby weylin weymouth wheatley wheaton wheeler whetu whit whitby whitcombe whitfield whitford whitley whitmore whitney whittaker wickham wid wieslav wihtred wilber wilbert wilbur wilburn wiley wilford wilfred wilfredo wilhelm wilkes wilkie will willard william williams willian willie willis willoughby willy wilmer wilmot wilson wilton wim winchester windsor winford winfred winog winslow winston winter winthrop winton wirrin wistan witton wolf wolfe wolfgang wolfram wolter woodburn woodley woodrow woodward woody woorak woorin worcester wren wright wulfhere wulfnoth wyatt wyber wyburn wye wykeham wylie wyman wyndam wyndham wynford wynn wynston wynton wyome wystan xan xanthus xanti xaver xavier xenophon xenos xerxes xiao ximen ximenes ximens ximun xurxo xylon yadon yael yahto yakecan yakov yale yan yancy yanis yannick yannis yaphet yardley yarin yarn yarran yasar yash yashodhar yashovarman yashpal yasin yasir yassin yasuo yates yazid yefrem yehuda yehudi yener yered yerik yervant yeshaya yeshe yestin yevgeni yigael yigit yileen yitzaak yitzak yngvar ynyr yobachi yoel yogendra yogesh yogi yona yong yoram yorick york yosef yoshifumi yoshimitsu yoshiyuki younes young ysgarran yuan yudhajit yuka yukio yul yule yuma yunus yuri yusuf yutaka yves zabulon zaccheo zachariah zacharias zachary zachery zack zackary zadok zador zafar zafer zagger zagon zahin zahir zahneny zahur zaid zaide zajzon zakai zaki zalan zale zalman zamir zander zane zanebono zani zaniel zanipolo zann zanobi zarand zared zarek zavier zayden zazu zbigniew zdenek zebadiah zebedeo zebulon zebulun zechariah zedekiah zeeman zefirino zeke zeki zeko zelig zelipe zelman zelotes zenaide zenas zennor zeno zenoa zenobio zenon zenos zephan zephaniah zeren zerind zero zeroun zeth zetony zeus zev zia ziff ziggy zigmond zigmund zion zircon ziv ziven zobor zoello zoilo zoland zoltan zombor zoran zoroaster zorro zosimo zotico zowie zsigmond zsolt zubin zuriel zurl Ömer diff --git a/modules/bibauthorid/lib/bibauthorid_backinterface.py b/modules/bibauthorid/lib/bibauthorid_backinterface.py index 144aa20f9..f956c1878 100644 --- a/modules/bibauthorid/lib/bibauthorid_backinterface.py +++ b/modules/bibauthorid/lib/bibauthorid_backinterface.py @@ -1,168 +1,115 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ''' bibauthorid_frontinterface This file aims to filter and modify the interface given by bibauthorid_bdinterface in order to make it usable by the backend so to keep it as clean as possible. ''' from itertools import groupby from operator import itemgetter -import bibauthorid_config as bconfig - -from bibauthorid_dbinterface import get_recently_modified_record_ids as get_papers_recently_modified #emitting -from bibauthorid_dbinterface import get_user_log #emitting -from bibauthorid_dbinterface import insert_user_log #emitting -from bibauthorid_dbinterface import get_all_names_from_personid #emitting -from bibauthorid_dbinterface import get_key_words #emitting -from bibauthorid_dbinterface import bib_matrix #emitting -from bibauthorid_dbinterface import get_name_by_bibrecref #emitting -from bibauthorid_dbinterface import get_new_personid #emitting -from bibauthorid_dbinterface import get_deleted_papers #emitting -from bibauthorid_dbinterface import get_authors_from_paper #emitting -from bibauthorid_dbinterface import get_coauthors_from_paper #emitting -from bibauthorid_dbinterface import delete_paper_from_personid #emitting -from bibauthorid_dbinterface import get_signatures_from_rec #emitting -from bibauthorid_dbinterface import modify_signature #emitting -from bibauthorid_dbinterface import remove_sigs #emitting -from bibauthorid_dbinterface import find_pids_by_name #emitting -from bibauthorid_dbinterface import new_person_from_signature #emitting -from bibauthorid_dbinterface import add_signature #emitting -from bibauthorid_dbinterface import copy_personids #emitting -from bibauthorid_dbinterface import get_all_bibrecs #emitting -from bibauthorid_dbinterface import remove_all_bibrecs #emitting -from bibauthorid_dbinterface import save_cluster #emitting -from bibauthorid_dbinterface import empty_results_table #emitting -from bibauthorid_dbinterface import check_personid_papers #emitting -from bibauthorid_dbinterface import get_bib10x, get_bib70x #emitting -from bibauthorid_dbinterface import get_bibrefrec_subset #emitting -from bibauthorid_dbinterface import update_personID_canonical_names #emitting -from bibauthorid_dbinterface import get_full_personid_papers #emitting -from bibauthorid_dbinterface import get_full_results #emitting -from bibauthorid_dbinterface import personid_get_recids_affected_since #emitting -from bibauthorid_dbinterface import get_existing_personids #emitting -from bibauthorid_dbinterface import get_lastname_results #emitting -from bibauthorid_dbinterface import personid_name_from_signature #emitting -from bibauthorid_dbinterface import get_existing_result_clusters #emitting -from bibauthorid_dbinterface import remove_personid_papers #emitting -from bibauthorid_dbinterface import repair_personid #emitting -from bibauthorid_dbinterface import get_sql_time #emitting -from bibauthorid_dbinterface import find_pids_by_exact_name #emitting -from bibauthorid_dbinterface import get_all_papers_of_pids #emitting -from bibauthorid_dbinterface import get_claimed_papers #emitting -from bibauthorid_dbinterface import remove_result_cluster #emitting -from bibauthorid_dbinterface import filter_modified_record_ids #emitting -from bibauthorid_dbinterface import personid_from_signature #emitting -from bibauthorid_dbinterface import move_signature #emitting -from bibauthorid_dbinterface import find_conflicts #emitting -from bibauthorid_dbinterface import get_signature_info #emitting -from bibauthorid_dbinterface import in_results -from bibauthorid_dbinterface import check_results #emitting -from bibauthorid_dbinterface import check_merger #emitting - -from search_engine import perform_request_search + +#Well this is bad, BUT otherwise there must 100+ lines +#of the form from dbinterface import ... # emitting +from bibauthorid_dbinterface import * + import bibauthorid_dbinterface as dbinter def group_personid(papers_table="aidPERSONID_PAPERS", data_table="aidPERSONID_DATA"): ''' Extracts, groups and returns the whole personid. ''' papers = dbinter.get_full_personid_papers(papers_table) data = dbinter.get_full_personid_data(data_table) group = lambda x: groupby(sorted(x, key=itemgetter(0)), key=itemgetter(0)) to_dict = lambda x: dict((pid, map(itemgetter(slice(1, None)), data)) for pid, data in x) return (to_dict(group(papers)), to_dict(group(data))) def compare_personid_tables(personIDold_papers, personIDold_data, personIDnew_papers, personIDnew_data, fp): """ Compares how personIDnew is different to personIDold. The two arguments must be generated with group_personid. fp must be a valid file object. """ header_new = "+++ " # header_old = " " header_removed = "--- " def write_new_personid(pid): fp.write(" Personid %d\n" % pid) def write_end_personid(): fp.write("\n") def write_paper(row, header): fp.write("%s[PAPER] %s, signature %s %d %d, flag: %d, lcul: %d\n" % (header, row[3], row[0], row[1], row[2], row[4], row[5])) def write_data(row, header): tag = "[%s]" % row[0].upper() fp.write("%s%s %s, opt: (%s %s %s)\n" % (header, tag, row[1], row[2], row[3], row[4])) all_pids = (frozenset(personIDold_data.keys()) | frozenset(personIDnew_data.keys()) | frozenset(personIDold_papers.keys()) | frozenset(personIDnew_papers.keys())) for pid in all_pids: data_old = frozenset(personIDold_data.get(pid, frozenset())) data_new = frozenset(personIDnew_data.get(pid, frozenset())) # old_data = data_new & data_old new_data = data_new - data_old del_data = data_old - data_new papers_old = frozenset(personIDold_papers.get(pid, frozenset())) papers_new = frozenset(personIDnew_papers.get(pid, frozenset())) # old_papers = papers_new & papers_old new_papers = papers_new - papers_old del_papers = papers_old - papers_new if new_data or del_data or new_papers or del_papers: write_new_personid(pid) for arr, header in zip([new_data, del_data], [header_new, header_removed]): for row in arr: write_data(row, header) for arr, header in zip([new_papers, del_papers], [header_new, header_removed]): for row in arr: write_paper(row, header) write_end_personid() def filter_bibrecs_outside(all_papers): all_bibrecs = get_all_bibrecs() to_remove = list(frozenset(all_bibrecs) - frozenset(all_papers)) chunk = 1000 separated = [to_remove[i: i + chunk] for i in range(0, len(to_remove), chunk)] for sep in separated: remove_all_bibrecs(sep) -def get_all_valid_bibrecs(): - collection_restriction_pattern = " or ".join(["980__a:\"%s\"" % x for x in bconfig.LIMIT_TO_COLLECTIONS]) - return perform_request_search(p="%s" % collection_restriction_pattern, rg=0) - - diff --git a/modules/bibauthorid/lib/bibauthorid_cluster_set.py b/modules/bibauthorid/lib/bibauthorid_cluster_set.py index cd863e47c..8f056a369 100644 --- a/modules/bibauthorid/lib/bibauthorid_cluster_set.py +++ b/modules/bibauthorid/lib/bibauthorid_cluster_set.py @@ -1,240 +1,285 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -from itertools import chain, groupby +from itertools import chain, groupby, izip, cycle from operator import itemgetter from bibauthorid_matrix_optimization import maximized_mapping from bibauthorid_backinterface import save_cluster from bibauthorid_backinterface import get_all_papers_of_pids from bibauthorid_backinterface import get_bib10x, get_bib70x -from bibauthorid_backinterface import get_all_valid_bibrecs -from bibauthorid_backinterface import get_bibrefrec_subset -from bibauthorid_backinterface import remove_result_cluster +from bibauthorid_backinterface import get_all_modified_names_from_personid +from bibauthorid_backinterface import get_signatures_from_bibrefs from bibauthorid_name_utils import generate_last_name_cluster_str -class Blob: +class Blob(object): def __init__(self, personid_records): ''' @param personid_records: A list of tuples: (personid, bibrefrec, flag). Notice that all bibrefrecs should be the same since the Blob represents only one bibrefrec. ''' self.bib = personid_records[0][1] assert all(p[1] == self.bib for p in personid_records) self.claimed = set() self.assigned = set() self.rejected = set() for pid, unused, flag in personid_records: if flag > 1: self.claimed.add(pid) elif flag >= -1: self.assigned.add(pid) else: self.rejected.add(pid) def create_blobs_by_pids(pids): ''' Returs a list of blobs by a given set of personids. Blob is an object which describes all information for a bibrefrec in the personid table. @type pids: iterable of integers ''' all_bibs = get_all_papers_of_pids(pids) all_bibs = ((x[0], (int(x[1]), x[2], x[3]), x[4]) for x in all_bibs) bibs_dict = groupby(sorted(all_bibs, key=itemgetter(1)), key=itemgetter(1)) blobs = [Blob(list(bibs)) for unused, bibs in bibs_dict] return blobs def group_blobs(blobs): ''' Separates the blobs into two groups of objects - those with claims and those without. ''' # created from blobs, which are claimed # [(bibrefrec, personid)] union = [] # created from blobs, which are not claimed # [(bibrefrec, personid/None, [personid])] independent = [] for blob in blobs: assert len(blob.claimed) + len(blob.assigned) == 1 if len(blob.claimed) > 0: union.append((blob.bib, list(blob.claimed)[0])) else: independent.append((blob.bib, list(blob.assigned)[0], list(blob.rejected))) return (union, independent) -class Cluster_set: - class Cluster: +class ClusterSet(object): + class Cluster(object): def __init__(self, bibs, hate = []): # hate is a symetrical relation self.bibs = set(bibs) self.hate = set(hate) def hates(self, other): return other in self.hate def quarrel(self, cl2): self.hate.add(cl2) cl2.hate.add(self) def _debug_test_hate_relation(self): for cl2 in self.hate: if not self.hates(cl2) or not cl2.hates(self): return False return True def __init__(self): self.clusters = [] + self.update_bibs() + + def update_bibs(self): + self.num_all_bibs = sum(len(cl.bibs) for cl in self.clusters) + + def all_bibs(self): + return chain.from_iterable(cl.bibs for cl in self.clusters) def create_skeleton(self, personids, last_name): blobs = create_blobs_by_pids(personids) self.last_name = last_name union, independent = group_blobs(blobs) union_clusters = {} for uni in union: union_clusters[uni[1]] = union_clusters.get(uni[1], []) + [uni[0]] cluster_dict = dict((personid, self.Cluster(bibs)) for personid, bibs in union_clusters.items()) self.clusters = cluster_dict.values() for i, cl in enumerate(self.clusters): cl.hate = set(chain(self.clusters[:i], self.clusters[i+1:])) for ind in independent: bad_clusters = [cluster_dict[i] for i in ind[2] if i in cluster_dict] cl = self.Cluster([ind[0]], bad_clusters) for bcl in bad_clusters: bcl.hate.add(cl) self.clusters.append(cl) + self.update_bibs() + return self # Creates a cluster set, ignoring the claims and the # rejected papers. def create_pure(self, personids, last_name): blobs = create_blobs_by_pids(personids) self.last_name = last_name self.clusters = [self.Cluster((blob.bib,)) for blob in blobs] + self.update_bibs() + return self # no longer used def create_body(self, blobs): union, independent = group_blobs(blobs) arranged_clusters = {} for cls in chain(union, independent): arranged_clusters[cls[1]] = arranged_clusters.get(cls[1], []) + [cls[0]] for pid, bibs in arranged_clusters.items(): cl = self.Cluster(bibs) cl.personid = pid self.clusters.append(cl) + self.update_bibs() + return self + + def create_from_mark(self, bibrefs, last_name): + bibrecrefs = get_signatures_from_bibrefs(bibrefs) + self.clusters = [ClusterSet.Cluster([bib]) for bib in bibrecrefs] + self.last_name = last_name + self.update_bibs() + return self # a *very* slow fucntion checking when the hate relation is no longer symetrical def _debug_test_hate_relation(self): for cl1 in self.clusters: if not cl1._debug_test_hate_relation(): return False return True # similar to the function above def _debug_duplicated_recs(self, mapping=None): for cl in self.clusters: if mapping: setty = set(mapping[x][2] for x in cl.bibs) else: setty = set(x[2] for x in cl.bibs) if len(cl.bibs) != len(setty): return False return True # No longer used but it might be handy. @staticmethod def match_cluster_sets(cs1, cs2): """ This functions tries to generate the best matching between cs1 and cs2 acoarding to the shared bibrefrecs. It returns a dictionary with keys, clsuters in cs1, and values, clusters in cs2. @param and type of cs1 and cs2: cluster_set @return: dictionary with the matching clusters. @return type: { cluster : cluster } """ matr = [[len(cl1.bibs & cl2.bibs) for cl2 in cs2.clusters] for cl1 in cs1.clusters] mapping = maximized_mapping(matr) return dict((cs1.clusters[mappy[0]], cs2.clusters[mappy[1]]) for mappy in mapping) - def store(self): ''' Stores the cluster set in a special table. This is used to store the results of tortoise/wedge in a table and later merge them with personid. ''' - remove_result_cluster("%s." % self.last_name) named_clusters = (("%s.%d" % (self.last_name, idx), cl) for idx, cl in enumerate(self.clusters)) map(save_cluster, named_clusters) -def cluster_sets_from_marktables(): - # { (100, 123) -> name } - ref100 = get_bib10x() - ref700 = get_bib70x() - bibref_2_name = dict([((100, ref), generate_last_name_cluster_str(name)) for ref, name in ref100] + - [((700, ref), generate_last_name_cluster_str(name)) for ref, name in ref700]) +def delayed_create_from_mark(bibrefs, last_name): + def ret(): + return ClusterSet().create_from_mark(bibrefs, last_name) + return ret - all_recs = get_all_valid_bibrecs() - all_bibrefrecs = chain(set((100, ref, rec) for rec, ref in get_bibrefrec_subset(100, all_recs, map(itemgetter(0), ref100))), - set((700, ref, rec) for rec, ref in get_bibrefrec_subset(700, all_recs, map(itemgetter(0), ref700)))) +def delayed_cluster_sets_from_marktables(): + # { name -> [(table, bibref)] } + name_buket = {} + for tab, ref, name in chain(izip(cycle((100,)), *izip(*get_bib10x())), + izip(cycle((700,)), *izip(*get_bib70x()))): + name = generate_last_name_cluster_str(name) + name_buket[name] = name_buket.get(name, []) + [(tab, ref)] + + all_refs = ((name, refs, len(list(get_signatures_from_bibrefs(refs)))) + for name, refs in name_buket.items()) + all_refs = sorted(all_refs, key = itemgetter(2)) + return ([delayed_create_from_mark(refs, name) for name, refs, size in all_refs], + map(itemgetter(0), all_refs), + map(itemgetter(2), all_refs)) + + +def create_lastname_list_from_personid(last_modification): + ''' + This function generates a dictionary from a last name + to list of personids which have this lastname. + ''' + # ((personid, [full Name1], Nbibs) ... ) + all_names = get_all_modified_names_from_personid(last_modification) - last_name_2_bibs = {} + # ((personid, last_name, Nbibs) ... ) + all_names = ((row[0], generate_last_name_cluster_str(iter(row[1]).next()), row[2]) + for row in all_names) - for bibrefrec in all_bibrefrecs: - table, ref, unused = bibrefrec - name = bibref_2_name[(table, ref)] - last_name_2_bibs[name] = last_name_2_bibs.get(name, []) + [bibrefrec] + # { (last_name, [(personid)... ], Nbibs) ... } + all_names = groupby(sorted(all_names, key=itemgetter(1)), key=itemgetter(1)) + all_names = ((key, list(data)) for key, data in all_names) + all_names = ((key, map(itemgetter(0), data), sum(x[2] for x in data)) for key, data in all_names) - cluster_sets = [] + return all_names - for name, bibrecrefs in last_name_2_bibs.items(): - new_cluster_set = Cluster_set() - new_cluster_set.clusters = [Cluster_set.Cluster([bib]) for bib in bibrecrefs] - new_cluster_set.last_name = name - cluster_sets.append(new_cluster_set) - return cluster_sets +def delayed_create(create_f, pids, lname): + def ret(): + return create_f(ClusterSet(), pids, lname) + return ret +def delayed_cluster_sets_from_personid(pure, last_modification=None): + names = create_lastname_list_from_personid(last_modification) + names = sorted(names, key=itemgetter(2)) + if pure: + create = ClusterSet.create_pure + else: + create = ClusterSet.create_skeleton + return ([delayed_create(create, name[1], name[0]) for name in names], + map(itemgetter(0), names), + map(itemgetter(2), names)) diff --git a/modules/bibauthorid/lib/bibauthorid_comparison.py b/modules/bibauthorid/lib/bibauthorid_comparison.py index 3c7ad68c1..e1c3a61fb 100644 --- a/modules/bibauthorid/lib/bibauthorid_comparison.py +++ b/modules/bibauthorid/lib/bibauthorid_comparison.py @@ -1,382 +1,383 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import re import bibauthorid_config as bconfig from itertools import starmap -from operator import itemgetter, mul +from operator import mul from bibauthorid_name_utils import compare_names from bibauthorid_dbinterface import get_name_by_bibrecref from bibauthorid_dbinterface import get_grouped_records from bibauthorid_dbinterface import get_all_authors from bibauthorid_dbinterface import get_collaboration from bibauthorid_dbinterface import resolve_affiliation from bibauthorid_backinterface import get_key_words from bibrank_citation_searcher import get_citation_dict from bibauthorid_general_utils import metadata_comparison_print # This module is not thread safe! # Be sure to use processes instead of # threads if you need parallel # computation! - # FIXME: hack for Python-2.4; switch to itemgetter() once Python-2.6 is default # use_refrec = itemgetter(slice(None)) # use_ref = itemgetter(0, 1) # use_rec = itemgetter(2) use_refrec = lambda x: x use_ref = lambda x: x[0:2] use_rec = lambda x: x[2] # At first glance this may look silly. # However, if we load the dictionaries # uncoditionally there will be only # one instance of them in the memory after # fork cit_dict = get_citation_dict("citationdict") recit_dict = get_citation_dict("reversedict") caches = [] def create_new_cache(): ret = {} caches.append(ret) return ret def clear_all_caches(): for c in caches: c.clear() _replacer = re.compile("[^a-zA-Z]") def canonical_str(string): return _replacer.sub('', string).lower() def jaccard(set1, set2): ''' This is no longer jaccard distance. ''' metadata_comparison_print("Jaccard: Found %d items in the first set." % len(set1)) metadata_comparison_print("Jaccard: Found %d items in the second set." % len(set2)) if not set1 or not set2: return '?' match = len(set1 & set2) ret = float(match) / float(len(set1) + len(set2) - match) metadata_comparison_print("Jaccard: %d common items." % match) metadata_comparison_print("Jaccard: returning %f." % ret) return ret def cached_sym(reducing): ''' Memoizes a pure function with two symmetrical arguments. ''' def deco(func): cache = create_new_cache() def ret(a, b): ra, rb = reducing(a), reducing(b) if ra < rb: ra, rb = (ra, rb) else: ra, rb = (rb, ra) if (ra, rb) not in cache: cache[(ra, rb)] = func(a, b) return cache[(ra, rb)] return ret return deco def cached_arg(reducing): ''' Memoizes a pure function. ''' def deco(func): cache = create_new_cache() def ret(a): ra = reducing(a) if ra not in cache: cache[ra] = func(a) return cache[ra] return ret return deco # The main function of this module def compare_bibrefrecs(bibref1, bibref2): ''' This function compares two bibrefrecs (100:123,456) using all metadata and returns: * a pair with two numbers in [0, 1] - the probability that the two belong together and the ratio of the metadata functions used to the number of all metadata functions. * '+' - the metadata showed us that the two belong together for sure. * '-' - the metadata showed us that the two do not belong together for sure. Example: '(0.7, 0.4)' - 2 out of 5 functions managed to compare the bibrefrecs and using their computations the average value of 0.7 is returned. '-' - the two bibrefres are in the same paper, so they dont belong together for sure. '(1, 0)' There was insufficient metadata to compare the bibrefrecs. (The first values in ignored). ''' # try first the metrics, which might return + or - papers = _compare_papers(bibref1, bibref2) if papers != '?': return papers -# if bconfig.CFG_INSPIRE_SITE: -# insp_ids = _compare_inspireid(bibref1, bibref2) -# if insp_ids != '?': -# return insp_ids + if bconfig.CFG_INSPIRE_SITE: + insp_ids = _compare_inspireid(bibref1, bibref2) + if insp_ids != '?': + return insp_ids, 1. # unfortunately, we have to do all comparisons if bconfig.CFG_INSPIRE_SITE: func_weight = ( (_compare_affiliations, 1.), (_compare_names, 5.), (_compare_citations, .5), (_compare_citations_by, .5), (_compare_key_words, 2.), ) elif bconfig.CFG_ADS_SITE: func_weight = ( (_compare_email, 3.), (_compare_unified_affiliations, 2.), (_compare_names, 5.), # register(_compare_citations, .5) # register(_compare_citations_by, .5) (_compare_key_words, 2.) ) else: func_weight = ((_compare_names, 5.),) results = [(func(bibref1, bibref2), weight) for func, weight in func_weight] coll = _compare_collaboration(bibref1, bibref2) if coll == '?': coll = _compare_coauthors(bibref1, bibref2) results.append((coll, 3.)) total_weights = sum(res[1] for res in results) - metadata_comparison_print("Final vector: %s." % str(results)) results = filter(lambda x: x[0] != '?', results) if not results: return 0, 0 cert = sum(starmap(mul, results)) prob = sum(res[1] for res in results) return cert / prob, prob / total_weights @cached_arg(use_refrec) def _find_affiliation(bib): aff = get_grouped_records(bib, str(bib[0]) + '__u').values()[0] return set(canonical_str(a) for a in aff) def _compare_affiliations(bib1, bib2): metadata_comparison_print("Comparing affiliations.") aff1 = _find_affiliation(bib1) aff2 = _find_affiliation(bib2) ret = jaccard(aff1, aff2) return ret + @cached_arg(use_refrec) def _find_unified_affiliation(bib): aff = get_grouped_records(bib, str(bib[0]) + '__u').values()[0] return set(x for x in list(canonical_str(resolve_affiliation(a)) for a in aff) if not x == "None") def _compare_unified_affiliations(bib1, bib2): metadata_comparison_print("Comparing affiliations.") aff1 = _find_affiliation(bib1) aff2 = _find_affiliation(bib2) ret = jaccard(aff1, aff2) return ret @cached_arg(use_refrec) def _find_inspireid(bib): ids = get_grouped_records(bib, str(bib[0]) + '__i').values()[0] return set(ids) def _compare_inspireid(bib1, bib2): metadata_comparison_print("Comparing inspire ids.") iids1 = _find_inspireid(bib1) iids2 = _find_inspireid(bib2) metadata_comparison_print("Found %d, %d different inspire ids for the two sets." % (len(iids1), len(iids2))) if (len(iids1) != 1 or len(iids2) != 1): return '?' elif iids1 == iids2: metadata_comparison_print("The ids are the same.") - return '+' + return 1. else: metadata_comparison_print("The ids are different.") - return '-' + return 0. @cached_arg(use_refrec) def _find_email(bib): ids = get_grouped_records(bib, str(bib[0]) + '__m').values()[0] return set(ids) def _compare_email(bib1, bib2): metadata_comparison_print("Comparing email addresses.") iids1 = _find_email(bib1) iids2 = _find_email(bib2) metadata_comparison_print("Found %d, %d different email addresses for the two sets." % (len(iids1), len(iids2))) if (len(iids1) != 1 or len(iids2) != 1): return '?' elif iids1 == iids2: metadata_comparison_print("The addresses are the same.") return 1.0 else: metadata_comparison_print("The addresses are there, but different.") return 0.3 def _compare_papers(bib1, bib2): metadata_comparison_print("Checking if the two bib refs are in the same paper.") if bib1[2] == bib2[2]: return '-' return '?' get_name_by_bibrecref = cached_arg(use_ref)(get_name_by_bibrecref) @cached_sym(use_ref) def _compare_names(bib1, bib2): metadata_comparison_print("Comparing names.") name1 = get_name_by_bibrecref(bib1) name2 = get_name_by_bibrecref(bib2) if name1 and name2: return compare_names(name1, name2, False) return '?' @cached_arg(use_rec) def _find_key_words(bib): words = get_key_words(bib[2]) return set(canonical_str(word) for word in words) @cached_sym(use_rec) def _compare_key_words(bib1, bib2): metadata_comparison_print("Comparing key words.") words1 = _find_key_words(bib1) words2 = _find_key_words(bib2) return jaccard(words1, words2) @cached_arg(use_rec) def _find_collaboration(bib): colls = get_collaboration(bib[2]) return set(canonical_str(coll) for coll in colls) @cached_sym(use_rec) def _compare_collaboration(bib1, bib2): metadata_comparison_print("Comparing collaboration.") colls1 = _find_collaboration(bib1) colls2 = _find_collaboration(bib2) metadata_comparison_print("Found %d, %d different collaborations for the two sets." % (len(colls1), len(colls2))) if (len(colls1) != 1 or len(colls2) != 1): return '?' elif colls1 == colls2: return 1. else: return 0. @cached_arg(use_rec) def _find_coauthors(bib): return set(canonical_str(a) for a in get_all_authors(bib[2])) @cached_sym(use_rec) def _compare_coauthors(bib1, bib2): metadata_comparison_print("Comparing authors.") aths1 = _find_coauthors(bib1) aths2 = _find_coauthors(bib2) return jaccard(aths1, aths2) @cached_arg(use_rec) def _find_citations(bib): return set(cit_dict.get(bib[2], ())) @cached_sym(use_rec) def _compare_citations(bib1, bib2): metadata_comparison_print("Comparing citations.") cites1 = _find_citations(bib1) cites2 = _find_citations(bib2) return jaccard(cites1, cites2) @cached_arg(use_rec) def _find_citations_by(bib): return set(recit_dict.get(bib[2], ())) @cached_sym(use_rec) def _compare_citations_by(bib1, bib2): metadata_comparison_print("Comparing citations by.") cites1 = _find_citations_by(bib1) cites2 = _find_citations_by(bib2) return jaccard(cites1, cites2) + + diff --git a/modules/bibauthorid/lib/bibauthorid_config.py b/modules/bibauthorid/lib/bibauthorid_config.py index 491a2586d..7b7fb4d61 100644 --- a/modules/bibauthorid/lib/bibauthorid_config.py +++ b/modules/bibauthorid/lib/bibauthorid_config.py @@ -1,168 +1,193 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ bibauthorid_config Part of the framework responsible for supplying configuration options used by different parts of the framework. Note, however, that it's best to declare any configuration options for the modules within themselves. """ import sys import os.path as osp try: from invenio.access_control_config import SUPERADMINROLE except ImportError: SUPERADMINROLE = "Superadmin" GLOBAL_CONFIG = True try: from invenio.config import CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS, \ CFG_BIBAUTHORID_MAX_PROCESSES, \ CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY, \ CFG_BIBAUTHORID_ENABLED, \ CFG_BIBAUTHORID_ON_AUTHORPAGES, \ CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE, \ CFG_INSPIRE_SITE, \ CFG_ADS_SITE except ImportError: GLOBAL_CONFIG = False # Current version of the framework VERSION = '1.1.2' # make sure current directory is importable FILE_PATH = osp.dirname(osp.abspath(__file__)) if FILE_PATH not in sys.path: sys.path.insert(0, FILE_PATH) # Permission definitions as in actions defined in roles CLAIMPAPER_ADMIN_ROLE = "claimpaperoperators" CLAIMPAPER_USER_ROLE = "claimpaperusers" CMP_ENABLED_ROLE = "paperclaimviewers" CHP_ENABLED_ROLE = "paperattributionviewers" AID_LINKS_ROLE = "paperattributionlinkviewers" CLAIMPAPER_VIEW_PID_UNIVERSE = 'claimpaper_view_pid_universe' CLAIMPAPER_CHANGE_OWN_DATA = 'claimpaper_change_own_data' CLAIMPAPER_CHANGE_OTHERS_DATA = 'claimpaper_change_others_data' CLAIMPAPER_CLAIM_OWN_PAPERS = 'claimpaper_claim_own_papers' CLAIMPAPER_CLAIM_OTHERS_PAPERS = 'claimpaper_claim_others_papers' #Number of persons in a search result for which the first five papers will be shown PERSON_SEARCH_RESULTS_SHOW_PAPERS_PERSON_LIMIT = 10 CMPROLESLCUL = {'guest': 0, CLAIMPAPER_USER_ROLE: 25, CLAIMPAPER_ADMIN_ROLE: 50, SUPERADMINROLE: 50} # Globally enable AuthorID Interfaces. # If False: No guest, user or operator will have access to the system. if GLOBAL_CONFIG: AID_ENABLED = CFG_BIBAUTHORID_ENABLED else: AID_ENABLED = True # Enable AuthorID information on the author pages. if GLOBAL_CONFIG: AID_ON_AUTHORPAGES = CFG_BIBAUTHORID_ON_AUTHORPAGES else: AID_ON_AUTHORPAGES = True # Limit the disambiguation to a specific collections. Leave empty for all # Collections are to be defined as a list of strings # Special for ADS: Focus on ASTRONOMY collection if GLOBAL_CONFIG and CFG_ADS_SITE: LIMIT_TO_COLLECTIONS = ["ASTRONOMY"] else: LIMIT_TO_COLLECTIONS = [] # Exclude documents that are visible in a collection mentioned here: EXCLUDE_COLLECTIONS = ["HEPNAMES", "INST", "Deleted", "DELETED", "deleted"] # User info keys for externally claimed records # e.g. for arXiv SSO: ["external_arxivids"] if GLOBAL_CONFIG and CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY: EXTERNAL_CLAIMED_RECORDS_KEY = CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY else: EXTERNAL_CLAIMED_RECORDS_KEY = [] # Lists all filters that are valid to filter the export by. # An example is 'arxiv' to return only papers with a 037 entry named arxiv VALID_EXPORT_FILTERS = ["arxiv"] # Max number of threads to parallelize sql queryes in table_utils updates if GLOBAL_CONFIG and CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS: PERSONID_SQL_MAX_THREADS = CFG_BIBAUTHORID_PERSONID_SQL_MAX_THREADS else: PERSONID_SQL_MAX_THREADS = 12 # Max number of processes spawned by the disambiguation algorithm if GLOBAL_CONFIG and CFG_BIBAUTHORID_MAX_PROCESSES: BIBAUTHORID_MAX_PROCESSES = CFG_BIBAUTHORID_MAX_PROCESSES else: BIBAUTHORID_MAX_PROCESSES = 12 WEDGE_THRESHOLD = 0.8 +#Rabbit use or ignore external ids +RABBIT_USE_EXTERNAL_IDS = True + +#Collect and use in rabbit external ids INSPIREID +COLLECT_EXTERNAL_ID_INSPIREID = CFG_INSPIRE_SITE +RABBIT_USE_EXTERNAL_ID_INSPIREID = CFG_INSPIRE_SITE + +#Force rabbit to cache entire marc tables instead of querying db if dealing with more +#then threshold papers +RABBIT_USE_CACHED_GET_GROUPED_RECORDS_THRESHOLD = 500 + + +#Cache the personid table for performing exact name searches? +RABBIT_USE_CACHED_PID_FOR_EXACT_NAME_SEARCH = True + +#Collect external ids from and store them as person attributes only from manually claimed papers? +LIMIT_EXTERNAL_IDS_COLLECTION_TO_CLAIMED_PAPERS = False + + # BibAuthorID debugging options # This flag triggers most of the output. DEBUG_OUTPUT = False +# Print timestamps +DEBUG_TIMESTAMPS = False +# Print timestamps even in update_status +DEBUG_TIMESTAMPS_UPDATE_STATUS = False # The following options trigger the output for parts of # bibauthorid which normally generate too much output DEBUG_NAME_COMPARISON_OUTPUT = False DEBUG_METADATA_COMPARISON_OUTPUT = False DEBUG_WEDGE_OUTPUT = False DEBUG_PROCESS_PEAK_MEMORY = True # Keep in mind that you might use an assert instead of this option. # Use DEBUG_CHECKS to guard heavy computations in order to make # their use explicit. DEBUG_CHECKS = False +TORTOISE_FILES_PATH = '/opt/tortoise_cache/' + ## force skip ui arxiv stub page (specific for inspire) BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE = True if GLOBAL_CONFIG and CFG_INSPIRE_SITE: BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE = CFG_BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE else: BIBAUTHORID_UI_SKIP_ARXIV_STUB_PAGE = True ## URL for the remote INSPIRE login that shall be shown on (arXiv stub page.) BIBAUTHORID_CFG_INSPIRE_LOGIN = "" if GLOBAL_CONFIG and CFG_INSPIRE_SITE: BIBAUTHORID_CFG_INSPIRE_LOGIN = 'https://arxiv.org/inspire_login' # Shall we send from locally defined eMail address or from the users one # when we send out a ticket? Default is True -> send with user's email TICKET_SENDING_FROM_USER_EMAIL = True # Regexp for the names separation NAMES_SEPARATOR_CHARACTER_LIST = ",;.=\-\(\)" SURNAMES_SEPARATOR_CHARACTER_LIST = ",;" diff --git a/modules/bibauthorid/lib/bibauthorid_daemon.py b/modules/bibauthorid/lib/bibauthorid_daemon.py index 3e1c5038b..b9ed1655c 100644 --- a/modules/bibauthorid/lib/bibauthorid_daemon.py +++ b/modules/bibauthorid/lib/bibauthorid_daemon.py @@ -1,278 +1,289 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Bibauthorid Daemon This module IS NOT standalone safe - it should never be run this way. """ import sys import bibauthorid_config as bconfig import bibtask -from bibauthorid_backinterface import get_papers_recently_modified +from bibauthorid_backinterface import get_recently_modified_record_ids from bibauthorid_backinterface import get_user_log from bibauthorid_backinterface import insert_user_log from bibauthorid_backinterface import get_sql_time try: any([True]) except: def any(x): for element in x: if element: return True return False def bibauthorid_daemon(): """Constructs the Bibauthorid bibtask.""" bibtask.task_init(authorization_action='runbibclassify', authorization_msg="Bibauthorid Task Submission", description=""" Purpose: Disambiguate Authors and find their identities. Examples: - Process all records that hold an author with last name 'Ellis': $ bibauthorid -u admin --update-personid --all-records - Disambiguate all records on a fresh installation $ bibauthorid -u admin --disambiguate --from-scratch """, help_specific_usage=""" bibauthorid [COMMAND] [OPTIONS] COMMAND You can choose only one from the following: --update-personid Updates personid adding not yet assigned papers to the system, in a fast, best effort basis. Cleans the table from stale records. --disambiguate Disambiguates all signatures in the database using the tortoise/wedge algorithm. This usually takes a LOT of time so the results are stored in a special table. Use --merge to use the results. --merge Updates the personid tables with the results from the --disambiguate algorithm. OPTIONS Options for update personid (default) Will update only the modified records since last run. -i, --record-ids Force the procedure to work only on the specified records. This option is exclusive with --all-records. --all-records Force the procedure to work on all records. This option is exclusive with --record-ids. Options for disambiguate (default) Performs full disambiguation of all records in the current personid tables with respect to the user decisions. --from-scratch Ignores the current information in the personid tables and disambiguates everything from scratch. There are no options for the merger. """, version="Invenio Bibauthorid v%s" % bconfig.VERSION, specific_params=("i:", [ "record-ids=", "disambiguate", "merge", "all-records", "update-personid", "from-scratch" ]), task_submit_elaborate_specific_parameter_fnc=_task_submit_elaborate_specific_parameter, task_submit_check_options_fnc=_task_submit_check_options, task_run_fnc=_task_run_core) def _task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually, it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. """ if key in ("--update-personid",): bibtask.task_set_option("update_personid", True) elif key in ("--record-ids", '-i'): if value.count("="): value = value[1:] value = value.split(",") bibtask.task_set_option("record_ids", value) elif key in ("--all-records",): bibtask.task_set_option("all_records", True) elif key in ("--disambiguate",): bibtask.task_set_option("disambiguate", True) elif key in ("--merge",): bibtask.task_set_option("merge", True) elif key in ("--from-scratch",): bibtask.task_set_option("from_scratch", True) else: return False return True def _task_run_core(): """ Runs the requested task in the bibsched environment. """ if bibtask.task_get_option('update_personid'): record_ids = bibtask.task_get_option('record_ids') if record_ids: record_ids = map(int, record_ids) all_records = bibtask.task_get_option('all_records') bibtask.task_update_progress('Updating personid...') run_rabbit(record_ids, all_records) bibtask.task_update_progress('PersonID update finished!') if bibtask.task_get_option("disambiguate"): bibtask.task_update_progress('Performing full disambiguation...') run_tortoise(bool(bibtask.task_get_option("from_scratch"))) bibtask.task_update_progress('Full disambiguation finished!') if bibtask.task_get_option("merge"): bibtask.task_update_progress('Merging results...') run_merge() bibtask.task_update_progress('Merging finished!') return 1 def _task_submit_check_options(): """ Required by bibtask. Checks the options. """ update_personid = bibtask.task_get_option("update_personid") disambiguate = bibtask.task_get_option("disambiguate") merge = bibtask.task_get_option("merge") record_ids = bibtask.task_get_option("record_ids") all_records = bibtask.task_get_option("all_records") from_scratch = bibtask.task_get_option("from_scratch") commands = bool(update_personid) + bool(disambiguate) + bool(merge) if commands == 0: bibtask.write_message("ERROR: At least one command should be specified!" , stream=sys.stdout, verbose=0) return False if commands > 1: bibtask.write_message("ERROR: The options --update-personid, --disambiguate " "and --merge are mutually exclusive." , stream=sys.stdout, verbose=0) return False assert commands == 1 if update_personid: if any((from_scratch,)): bibtask.write_message("ERROR: The only options which can be specified " "with --update-personid are --record-ids and " "--all-records" , stream=sys.stdout, verbose=0) return False options = bool(record_ids) + bool(all_records) if options > 1: bibtask.write_message("ERROR: conflicting options: --record-ids and " "--all-records are mutually exclusive." , stream=sys.stdout, verbose=0) return False if record_ids: for iden in record_ids: if not iden.isdigit(): bibtask.write_message("ERROR: Record_ids expects numbers. " "Provided: %s." % iden) return False if disambiguate: if any((record_ids, all_records)): bibtask.write_message("ERROR: The only option which can be specified " "with --disambiguate is from-scratch" , stream=sys.stdout, verbose=0) return False if merge: if any((record_ids, all_records, from_scratch)): bibtask.write_message("ERROR: There are no options which can be " "specified along with --merge" , stream=sys.stdout, verbose=0) return False return True def rabbit_with_log(papers, check_invalid_papers, log_comment, partial=False): from bibauthorid_rabbit import rabbit starting_time = get_sql_time() rabbit(papers, check_invalid_papers) if partial: action = 'PID_UPDATE_PARTIAL' else: action = 'PID_UPDATE' insert_user_log('daemon', '-1', action, 'bibsched', 'status', comment=log_comment, timestamp=starting_time) + def run_rabbit(paperslist, all_records=False): if not paperslist and all_records: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') elif not paperslist: last_log = get_user_log(userinfo='daemon', action='PID_UPDATE', only_most_recent=True) if len(last_log) >= 1: #select only the most recent papers - recently_modified = get_papers_recently_modified(date=last_log[0][2]) + recently_modified = get_recently_modified_record_ids(date=last_log[0][2]) if not recently_modified: bibtask.write_message("update_personID_table_from_paper: " "All person entities up to date.", stream=sys.stdout, verbose=0) else: bibtask.write_message("update_personID_table_from_paper: Running on: " + str(recently_modified), stream=sys.stdout, verbose=0) rabbit_with_log(recently_modified, True, 'bibauthorid_daemon, run_personid_fast_assign_papers on ' + str([paperslist, all_records, recently_modified])) else: rabbit_with_log(None, True, 'bibauthorid_daemon, update_personid on all papers') else: rabbit_with_log(paperslist, True, 'bibauthorid_daemon, personid_fast_assign_papers on ' + str(paperslist), partial=True) def run_tortoise(from_scratch): from bibauthorid_tortoise import tortoise, tortoise_from_scratch + if from_scratch: tortoise_from_scratch() else: - tortoise() + start_time = get_sql_time() + tortoise_db_name = 'tortoise' + + last_run = get_user_log(userinfo=tortoise_db_name, only_most_recent=True) + if last_run: + modified = get_recently_modified_record_ids(last_run[0][2]) + else: + modified = [] + tortoise(modified) + + insert_user_log(tortoise_db_name, '-1', '', '', '', timestamp=start_time) def run_merge(): from bibauthorid_merge import merge merge() - diff --git a/modules/bibauthorid/lib/bibauthorid_dbinterface.py b/modules/bibauthorid/lib/bibauthorid_dbinterface.py index c9464214f..24c92e9bc 100644 --- a/modules/bibauthorid/lib/bibauthorid_dbinterface.py +++ b/modules/bibauthorid/lib/bibauthorid_dbinterface.py @@ -1,2242 +1,2528 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ''' bibauthorid_bdinterface This is the only file in bibauthorid which should use the data base. It should have an interface for all other files in the module. ''' import bibauthorid_config as bconfig -import sys import numpy import cPickle -import zlib +from cPickle import UnpicklingError +import os +import gc from itertools import groupby, count, ifilter, chain, imap from operator import itemgetter -from invenio.access_control_engine import acc_authorize_action + +from search_engine import perform_request_search +from access_control_engine import acc_authorize_action from bibauthorid_name_utils import split_name_parts from bibauthorid_name_utils import create_canonical_name from bibauthorid_name_utils import create_normalized_name from bibauthorid_general_utils import bibauthor_print from bibauthorid_general_utils import update_status \ , update_status_final -from dbquery import run_sql \ - , OperationalError \ - , ProgrammingError +from dbquery import run_sql + +MARC_100_700_CACHE = None + +COLLECT_INSPIRE_ID = bconfig.COLLECT_EXTERNAL_ID_INSPIREID def get_sql_time(): ''' Returns the time acoarding to the database. The type is datetime.datetime. ''' return run_sql("select now()")[0][0] def set_personid_row(person_id, tag, value, opt1=0, opt2=0, opt3=""): ''' Inserts data and the additional options of a person by a given personid and tag. ''' run_sql("INSERT INTO aidPERSONIDDATA " "(`personid`, `tag`, `data`, `opt1`, `opt2`, `opt3`) " "VALUES (%s, %s, %s, %s, %s, %s)", (person_id, tag, value, opt1, opt2, opt3)) def get_personid_row(person_id, tag): ''' Returns all the records associated to a person and a tag. @param person_id: id of the person to read the attribute from @type person_id: int @param tag: the tag to read. @type tag: string @return: the data associated with a virtual author @rtype: tuple of tuples ''' return run_sql("SELECT data, opt1, opt2, opt3 " "data FROM aidPERSONIDDATA " "WHERE personid = %s AND tag = %s", (person_id, tag)) def del_personid_row(tag, person_id=None, value=None): ''' Change the value associated to the given tag for a certain person. @param person_id: ID of the person @type person_id: int @param tag: tag to be updated @type tag: string @param value: value to be written for the tag @type value: string ''' if person_id: if value: run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s and data=%s", (person_id, tag, value,)) else: run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s", (person_id, tag,)) else: if value: run_sql("delete from aidPERSONIDDATA where tag=%s and data=%s", (tag, value,)) else: run_sql("delete from aidPERSONIDDATA where tag=%s", (tag,)) def get_all_papers_of_pids(personid_list): ''' Get all papers of authors in a given list and sorts the results by bibrefrec. @param personid_list: list with the authors. @type personid_list: iteratable of integers. ''' if personid_list: - plist = list_2_SQL_str(personid_list, lambda x: str(x)) + plist = list_2_SQL_str(personid_list) paps = run_sql("select personid, bibref_table, bibref_value, bibrec, flag " "from aidPERSONIDPAPERS " "where personid in %s " % plist) inner = set(row[1:4] for row in paps if row[4] > -2) return (x for x in paps if x[1:4] in inner) return () def del_person_not_manually_claimed_papers(pid): ''' Deletes papers from a person which have not been manually claimed. ''' run_sql("delete from aidPERSONIDPAPERS " "where and (flag <> '-2' and flag <> '2') and personid=%s", (pid,)) def get_personid_from_uid(uid): ''' Returns the personID associated with the provided ui. If the personID is already associated with the person the secon parameter is True, false otherwise. @param uid: userID @type uid: ((int,),) ''' pid = run_sql("select personid from aidPERSONIDDATA where tag=%s and data=%s", ('uid', str(uid[0][0]))) if len(pid) == 1: return (pid[0], True) else: return ([-1], False) def get_uid_from_personid(pid): uid = run_sql("select data from aidPERSONIDDATA where tag='uid' and personid = %s", (pid,)) if uid: return uid[0][0] else: return None def get_new_personid(): pids = (run_sql("select max(personid) from aidPERSONIDDATA")[0][0], run_sql("select max(personid) from aidPERSONIDPAPERS")[0][0]) pids = tuple(int(p) for p in pids if p != None) if len(pids) == 2: return max(*pids) + 1 elif len(pids) == 1: return pids[0] + 1 else: return 0 def get_existing_personids(): try: pids_data = set(zip(*run_sql("select distinct personid from aidPERSONIDDATA"))[0]) except IndexError: pids_data = set() try: pids_pap = set(zip(*run_sql("select distinct personid from aidPERSONIDPAPERS"))[0]) except IndexError: pids_pap = set() return pids_data | pids_pap def get_existing_result_clusters(): return run_sql("select distinct personid from aidRESULTS") def create_new_person(uid= -1, uid_is_owner=False): ''' Create a new person. Set the uid as owner if requested. ''' pid = get_new_personid() if uid_is_owner: set_personid_row(pid, 'uid', str(uid)) else: set_personid_row(pid, 'user-created', str(uid)) return pid def create_new_person_from_uid(uid): return create_new_person(uid, uid_is_owner=True) def new_person_from_signature(sig, name=None): ''' Creates a new person from a signature. ''' pid = get_new_personid() add_signature(sig, name, pid) return pid def add_signature(sig, name, pid): ''' Inserts a signature in personid. ''' if not name: name = get_name_by_bibrecref(sig) name = create_normalized_name(split_name_parts(name)) run_sql("INSERT INTO aidPERSONIDPAPERS " "(personid, bibref_table, bibref_value, bibrec, name) " "VALUES (%s, %s, %s, %s, %s)" , (pid, str(sig[0]), sig[1], sig[2], name)) def move_signature(sig, pid): ''' Inserts a signature in personid. ''' run_sql("update aidPERSONIDPAPERS set personid=%s " - "where bibref_table=%s and bibref_value=%s " + "where bibref_table like %s and bibref_value=%s " "and bibrec=%s and flag <> 2 and flag <> -2", (pid,) + sig) def find_conflicts(sig, pid): """ """ return run_sql("select bibref_table, bibref_value, bibrec, flag " "from aidPERSONIDPAPERS where " "personid = %s and " "bibrec = %s and " "flag <> -2" , (pid, sig[2])) def update_request_ticket(person_id, tag_data_tuple, ticket_id=None): ''' Creates / updates a request ticket for a personID @param: personid int @param: tag_data_tuples 'image' of the ticket: (('paper', '700:316,10'), ('owner', 'admin'), ('external_id', 'ticket_18')) @return: ticketid ''' #tags: rt_owner (the owner of the ticket, associating the rt_number to the transaction) # rt_external_id # rt_paper_cornfirm, rt_paper_reject, rt_paper_forget, rt_name, rt_email, rt_whatever #flag: rt_number if not ticket_id: last_id = run_sql("select max(opt1) from aidPERSONIDDATA where personid=%s and tag like %s", (str(person_id), 'rt_%'))[0][0] if last_id: ticket_id = last_id + 1 else: ticket_id = 1 else: delete_request_ticket(person_id, ticket_id) for d in tag_data_tuple: run_sql("insert into aidPERSONIDDATA (personid, tag, data, opt1) " "values (%s,%s,%s,%s)", (str(person_id), 'rt_' + str(d[0]), str(d[1]), str(ticket_id))) return ticket_id def delete_request_ticket(person_id, ticket_id=None): ''' Removes a ticket from a person_id. If ticket_id is not provider removes all the tickets pending on a person. ''' if ticket_id: run_sql("delete from aidPERSONIDDATA where personid=%s and tag like %s and opt1 =%s", (str(person_id), 'rt_%', str(ticket_id))) else: run_sql("delete from aidPERSONIDDATA where personid=%s and tag like %s", (str(person_id), 'rt_%')) def get_all_personids_by_name(regexpr): return run_sql("select personid, name " "from aidPERSONIDPAPERS " "where name like %s", (regexpr,)) def get_personids_by_canonical_name(target): pid = run_sql("select personid from aidPERSONIDDATA where " "tag='canonical_name' and data like %s", (target,)) if pid: return run_sql("select personid, name from aidPERSONIDPAPERS " "where personid=%s", (pid[0][0],)) else: return [] def get_bibref_modification_status(bibref): ''' Determines if a record attached to a person has been touched by a human by checking the flag. @param pid: The Person ID of the person to check the assignment from @type pid: int @param bibref: The paper identifier to be checked (e.g. "100:12,144") @type bibref: string returns [bool:human_modified, int:lcul] ''' if not bibref: raise ValueError("A bibref is expected!") head, rec = bibref.split(',') table, ref = head.split(':') flags = run_sql("SELECT flag, lcul FROM aidPERSONIDPAPERS WHERE " "bibref_table = %s and bibref_value = %s and bibrec = %s" , (table, ref, rec)) if flags: return flags[0] else: return (False, 0) def get_canonical_id_from_personid(pid): ''' Finds the person id canonical name (e.g. Ellis_J_R_1) @param pid @type int @return: sql result of the request @rtype: tuple of tuple ''' return run_sql("SELECT data FROM aidPERSONIDDATA WHERE " "tag = %s AND personid = %s", ('canonical_name', str(pid))) def get_papers_status(paper): ''' Gets the personID and flag assiciated to papers @param papers: list of papers @type papers: '100:7531,9024' @return: (('data','personID','flag',),) @rtype: tuple of tuples ''' head, bibrec = paper.split(',') _table, bibref = head.split(':') rets = run_sql("select PersonID, flag " "from aidPERSONIDPAPERS " "where bibref_table = %s " "and bibref_value = %s " "and bibrec = %s" % (head, bibrec, bibref)) return [[paper] + list(x) for x in rets] def get_persons_from_recids(recids, return_alt_names=False, return_all_person_papers=False): rec_2_pid = dict() pid_2_data = dict() all_pids = set() def get_canonical_name(pid): return run_sql("SELECT data " "FROM aidPERSONIDDATA " "WHERE tag = %s " "AND personid = %s", ('canonical_name', pid)) for rec in recids: pids = run_sql("SELECT personid " "FROM aidPERSONIDPAPERS " "WHERE bibrec = %s " " and flag > -2 ", (rec,)) # for some reason python's set is faster than a mysql distinct pids = set(p[0] for p in pids) all_pids |= pids rec_2_pid[rec] = list(pids) for pid in all_pids: pid_data = {} canonical = get_canonical_name(pid) #We can supposed that this person didn't have a chance to get a canonical name yet #because it was not fully processed by it's creator. Anyway it's safe to try to create one #before failing miserably if not canonical: update_personID_canonical_names([pid]) canonical = get_canonical_name(pid) #assert len(canonical) == 1 #This condition cannot hold in case claims or update daemons are run in parallel #with this, as it can happen that a person with papers exists for wich a canonical name #has not been computed yet. Hence, it will be indexed next time, so it learns. #Each person should have at most one canonical name, so: assert len(canonical) <= 1 if len(canonical) == 1: pid_data = {'canonical_id' : canonical[0][0]} if return_alt_names: names = run_sql("SELECT name " "FROM aidPERSONIDPAPERS " "WHERE personid = %s " " and flag > -2 ", (pid,)) names = set(n[0] for n in names) pid_data['alternatative_names'] = list(names) if return_all_person_papers: recs = run_sql("SELECT bibrec " "FROM aidPERSONIDPAPERS " "WHERE personid = %s " " and flag > -2 ", (pid,)) recs = set(r[0] for r in recs) pid_data['person_records'] = list(recs) pid_2_data[pid] = pid_data return (rec_2_pid, pid_2_data) def get_person_db_names_count(pid, sort_by_count=True): ''' Returns the set of name strings and count associated to a person id. The name strings are as found in the database. @param pid: ID of the person @type pid: ('2',) ''' id_2_count = run_sql("select bibref_table, bibref_value " "from aidPERSONIDPAPERS " "where personid = %s " "and flag > -2", (pid,)) ref100 = [refid[1] for refid in id_2_count if refid[0] == '100'] ref700 = [refid[1] for refid in id_2_count if refid[0] == '700'] ref100_count = dict((key, len(list(data))) for key, data in groupby(sorted(ref100))) ref700_count = dict((key, len(list(data))) for key, data in groupby(sorted(ref700))) if ref100: ref100_s = list_2_SQL_str(ref100, str) id100_2_str = run_sql("select id, value " "from bib10x " "where id in %s" % ref100_s) else: id100_2_str = tuple() if ref700: ref700_s = list_2_SQL_str(ref700, str) id700_2_str = run_sql("select id, value " "from bib70x " "where id in %s" % ref700_s) else: id700_2_str = tuple() ret100 = [(name, ref100_count[refid]) for refid, name in id100_2_str] ret700 = [(name, ref700_count[refid]) for refid, name in id700_2_str] ret = ret100 + ret700 if sort_by_count: ret = sorted(ret, key=itemgetter(1), reverse=True) return ret def get_person_id_from_canonical_id(canonical_id): ''' Finds the person id from a canonical name (e.g. Ellis_J_R_1) @param canonical_id: the canonical ID @type canonical_id: string @return: sql result of the request @rtype: tuple of tuple ''' return run_sql("SELECT personid FROM aidPERSONIDDATA WHERE " "tag='canonical_name' AND data = %s", (canonical_id,)) def get_person_names_count(pid): ''' Returns the set of name strings and count associated to a person id @param pid: ID of the person @type pid: ('2',) @param value: value to be written for the tag @type value: string ''' return run_sql("select name, count(name) from aidPERSONIDPAPERS where " "personid=%s and flag > -2 group by name", (pid,)) def get_person_db_names_set(pid): ''' Returns the set of db_name strings associated to a person id @param pid: ID of the person @type pid: 2 ''' names = get_person_db_names_count(pid) if names: return zip(set(zip(*names)[0])) else: return [] def get_personids_from_bibrec(bibrec): ''' Returns all the personids associated to a bibrec. ''' pids = run_sql("select distinct personid from aidPERSONIDPAPERS where bibrec=%s and flag > -2", (bibrec,)) if pids: return zip(*pids)[0] else: return [] def get_personids_and_papers_from_bibrecs(bibrecs, limit_by_name=None): ''' ''' if not bibrecs: return [] else: bibrecs = list_2_SQL_str(bibrecs) if limit_by_name: try: surname = split_name_parts(limit_by_name)[0] except IndexError: surname = None else: surname = None if not surname: data = run_sql("select personid,bibrec from aidPERSONIDPAPERS where bibrec in %s" % (bibrecs,)) else: surname = split_name_parts(limit_by_name)[0] data = run_sql(("select personid,bibrec from aidPERSONIDPAPERS where bibrec in %s " "and name like " % bibrecs) + ' %s ', (surname + '%',)) pidlist = [(k, set([s[1] for s in d])) for k, d in groupby(sorted(data, key=lambda x:x[0]), key=lambda x:x[0])] pidlist = sorted(pidlist, key=lambda x:len(x[1]), reverse=True) return pidlist def get_person_bibrecs(pid): ''' Returns bibrecs associated with a personid @param pid: integer personid @return [bibrec1,...,bibrecN] ''' papers = run_sql("select bibrec from aidPERSONIDPAPERS where personid=%s", (str(pid),)) if papers: return list(set(zip(*papers)[0])) else: return [] def get_person_papers(pid, flag, show_author_name=False, show_title=False, show_rt_status=False, show_affiliations=False, show_date=False, show_experiment=False): query = "bibref_table, bibref_value, bibrec, flag" if show_author_name: query += ", name" all_papers = run_sql("SELECT " + query + " " "FROM aidPERSONIDPAPERS " "WHERE personid = %s " "AND flag >= %s", (pid, flag)) def format_paper(paper): bibrefrec = "%s:%d,%d" % paper[:3] ret = {'data' : bibrefrec, 'flag' : paper[3] } if show_author_name: ret['authorname'] = paper[4] if show_title: ret['title'] = "" title = get_title_from_rec(paper[2]) if title: - ret['title'] = (title, ) + ret['title'] = (title,) if show_rt_status: rt_count = run_sql("SELECT count(personid) " "FROM aidPERSONIDDATA WHERE " "tag like 'rt_%%' and data = %s" , (bibrefrec,)) ret['rt_status'] = (rt_count[0][0] > 0) if show_affiliations: tag = '%s__u' % paper[0] ret['affiliation'] = get_grouped_records(paper[:3], tag)[tag] if show_date: ret['date'] = [] date_id = run_sql("SELECT id_bibxxx " "FROM bibrec_bib26x " "WHERE id_bibrec = %s " , (paper[2],)) if date_id: date_id_s = list_2_SQL_str(date_id, lambda x: x[0]) date = run_sql("SELECT value " "FROM bib26x " "WHERE id in %s " "AND tag = %s" % (date_id_s, "'269__c'")) if date: ret['date'] = zip(*date)[0] if show_experiment: ret['experiment'] = [] experiment_id = run_sql("SELECT id_bibxxx " "FROM bibrec_bib69x " "WHERE id_bibrec = %s " , (paper[2],)) if experiment_id: experiment_id_s = list_2_SQL_str(experiment_id, lambda x: x[0]) experiment = run_sql("SELECT value " "FROM bib69x " "WHERE id in %s " "AND tag = %s" % (experiment_id_s, "'693__e'")) if experiment: ret['experiment'] = zip(*experiment)[0] return ret return [format_paper(paper) for paper in all_papers] def get_persons_with_open_tickets_list(): ''' Finds all the persons with open tickets and returns pids and count of tickets @return: [[pid, ticket_count]] ''' return run_sql("select personid, count(distinct opt1) from " "aidPERSONIDDATA where tag like 'rt_%' group by personid") def get_request_ticket(person_id, ticket_id=None): ''' Retrieves one or many requests tickets from a person @param: person_id: person id integer @param: matching: couple of values to match ('tag', 'value') @param: ticket_id: ticket id (flag) value @returns: [[[('tag', 'value')], ticket_id]] [[[('a', 'va'), ('b', 'vb')], 1L], [[('b', 'daOEIaoe'), ('a', 'caaoOUIe')], 2L]] ''' if ticket_id: tstr = " and opt1='%s' " % ticket_id else: tstr = " " tickets = run_sql("select tag,data,opt1 from aidPERSONIDDATA where personid=%s and " " tag like 'rt_%%' " + tstr , (person_id,)) return [[[(s[0][3:], s[1]) for s in d], k] for k, d in groupby(sorted(tickets, key=lambda k: k[2]), key=lambda k: k[2])] -def insert_user_log(userinfo, personid, action, tag, value, comment='', transactionid=0, timestamp=None): +def insert_user_log(userinfo, personid, action, tag, value, comment='', transactionid=0, timestamp=None, userid=''): ''' Instert log entries in the user log table. For example of entres look at the table generation script. @param userinfo: username or user identifier @type: string @param personid: personid involved in the transaction @type: longint @param action: action type @type: string @param tag: tag @type: string @param value: value for the transaction @type: string @param comment: optional comment for the transaction @type: string @param transactionid: optional id for the transaction @type: longint @return: the transactionid @rtype: longint ''' -# if transactionid == 0: -# transactionid = max(run_sql('SELECT MAX(transactionid) FROM `aidUSERINPUTLOG`')[0][0], -1) + 1 - if not timestamp: timestamp = run_sql('select now()')[0][0] -# run_sql('insert into aidUSERINPUTLOG (transactionid,timestamp,userinfo,personid,action,tag,value,comment) values ' -# '(%(transactionid)s,%(timestamp)s,%(userinfo)s,%(personid)s,%(action)s,%(tag)s,%(value)s,%(comment)s)', -# ({'transactionid':str(transactionid), -# 'timestamp':timestamp.timestamp, -# 'userinfo':str(userinfo), -# 'personid':str(personid), -# 'action':str(action), -# 'tag':str(tag), -# 'value':str(value), -# 'comment':str(comment)})) run_sql('insert into aidUSERINPUTLOG ' - '(transactionid,timestamp,userinfo,personid,action,tag,value,comment) values ' + '(transactionid,timestamp,userinfo,userid,personid,action,tag,value,comment) values ' '(%s,%s,%s,%s,%s,%s,%s,%s)', - (transactionid, timestamp, userinfo, personid, + (transactionid, timestamp, userinfo, userid, personid, action, tag, value, comment)) return transactionid def person_bibref_is_touched_old(pid, bibref): ''' Determines if a record attached to a person has been touched by a human by checking the flag. @param pid: The Person ID of the person to check the assignment from @type pid: int @param bibref: The paper identifier to be checked (e.g. "100:12,144") @type bibref: string ''' bibref, rec = bibref.split(",") table, ref = bibref.split(":") flag = run_sql("SELECT flag " "FROM aidPERSONIDPAPERS " "WHERE personid = %s " "AND bibref_table = %s " "AND bibref_value = %s " "AND bibrec = %s" , (pid, table, ref, rec)) try: flag = flag[0][0] except (IndexError): return False if not flag: return False elif -2 < flag < 2: return False else: return True def confirm_papers_to_person(pid, papers, user_level=0): ''' Confirms the relationship between pid and paper, as from user input. @param pid: id of the person @type pid: ('2',) @param papers: list of papers to confirm @type papers: (('100:7531,9024',),) @param gather_list: list to store the pids to be updated rather than calling update_personID_names_string_set @typer gather_list: set([('2',), ('3',)]) ''' + + new_pid = get_new_personid() + pids_to_update = set(pid) + + for p in papers: bibref, rec = p[0].split(",") rec = int(rec) table, ref = bibref.split(":") ref = int(ref) + sig = (table, ref, rec) - run_sql("delete from aidPERSONIDPAPERS where personid=%s and bibrec=%s", (pid[0], rec)) - run_sql("delete from aidPERSONIDPAPERS where bibref_table=%s and " - " bibref_value = %s and bibrec=%s", - (table, ref, rec)) - - add_signature([table, ref, rec], None, pid[0]) + paps = run_sql("select bibref_table, bibref_value, bibrec " + "from aidPERSONIDPAPERS " + "where personid=%s " + "and bibrec=%s " + "and flag > -2" + , (pid[0], rec)) + rej_paps = run_sql("select bibref_table, bibref_value, bibrec " + "from aidPERSONIDPAPERS " + "where personid=%s " + "and bibrec=%s " + "and flag = -2" + , (pid[0], rec)) + + assert paps or rej_paps + assert len(paps) < 2 + + if paps and sig != paps[0]: + pids_to_update.add(new_pid) + move_signature(paps[0], new_pid) + + run_sql("delete from aidPERSONIDPAPERS where bibref_table like %s and " + " bibref_value = %s and bibrec=%s" + , sig) + add_signature(sig, None, pid[0]) run_sql("update aidPERSONIDPAPERS " "set personid = %s " ", flag = %s " ", lcul = %s " "where bibref_table = %s " "and bibref_value = %s " "and bibrec = %s" - , (str(pid[0]), '2', user_level, + , (pid[0], '2', user_level, table, ref, rec)) - update_personID_canonical_names(pid) + update_personID_canonical_names(pids_to_update) def reject_papers_from_person(pid, papers, user_level=0): ''' Confirms the negative relationship between pid and paper, as from user input. @param pid: id of the person @type pid: integer @param papers: list of papers to confirm @type papers: ('100:7531,9024',) ''' new_pid = get_new_personid() + pids_to_update = set([pid]) + for p in papers: brr, rec = p.split(",") table, ref = brr.split(':') sig = (table, ref, rec) records = personid_name_from_signature(sig) assert(records) fpid, name = records[0] - assert fpid == pid + if fpid == pid: + run_sql("INSERT INTO aidPERSONIDPAPERS " + "(personid, bibref_table, bibref_value, bibrec, name, flag, lcul) " + "VALUES (%s, %s, %s, %s, %s, %s, %s)" + , (pid, table, ref, rec, name, -2, user_level)) - run_sql("INSERT INTO aidPERSONIDPAPERS " - "(personid, bibref_table, bibref_value, bibrec, name, flag, lcul) " - "VALUES (%s, %s, %s, %s, %s, %s, %s)" - , (pid, table, ref, rec, name, -2, user_level)) + move_signature(sig, new_pid) + pids_to_update.add(new_pid) - move_signature(sig, new_pid) - update_personID_canonical_names((pid,)) + update_personID_canonical_names(pids_to_update) def reset_papers_flag(pid, papers): ''' Resets the flag associated to the papers to '0' @param papers: list of papers to confirm @type papers: (('100:7531,9024',),) @param gather_list: list to store the pids to be updated rather than calling update_personID_names_string_set @typer gather_list: set([('2',), ('3',)]) ''' for p in papers: bibref, rec = p[0].split(",") table, ref = bibref.split(":") - run_sql("update aidPERSONIDPAPERS " - "set flag = %s, lcul = %s " - "where bibref_table = %s " - "and bibref_value = %s " - "and bibrec = %s" , - ('0', '0', - table, ref, rec)) + sig = (table, ref, rec) + + paps = run_sql("select bibref_table, bibref_value, bibrec " + "from aidPERSONIDPAPERS " + "where personid=%s " + "and bibrec=%s " + , (pid[0], rec)) + rej_paps = run_sql("select bibref_table, bibref_value, bibrec " + "from aidPERSONIDPAPERS " + "where personid=%s " + "and bibrec=%s " + "and flag = -2" + , (pid[0], rec)) + + assert paps or rej_paps + assert len(paps) < 2 + + run_sql("delete from aidPERSONIDPAPERS where bibref_table like %s and " + "bibref_value = %s and bibrec = %s", + (sig)) + add_signature(sig, None, pid[0]) def user_can_modify_data(uid, pid): ''' Return True if the uid can modify data of this personID, false otherwise. @param uid: the user id @type: int @param pid: the person id @type: int @return: can user mofidfy data? @rtype: boolean ''' pid_uid = run_sql("select data from aidPERSONIDDATA where tag = %s" " and personid = %s", ('uid', str(pid))) if len(pid_uid) >= 1 and str(uid) == str(pid_uid[0][0]): rights = bconfig.CLAIMPAPER_CHANGE_OWN_DATA else: rights = bconfig.CLAIMPAPER_CHANGE_OTHERS_DATA return acc_authorize_action(uid, rights)[0] == 0 def get_possible_bibrecref(names, bibrec, always_match=False): ''' Returns a list of bibrefs for which the surname is matching @param names: list of names strings @param bibrec: bibrec number @param always_match: match with all the names (full bibrefs list) ''' splitted_names = [split_name_parts(n) for n in names] bibrec_names_100 = run_sql("select o.id, o.value from bib10x o, " "(select i.id_bibxxx as iid from bibrec_bib10x i " "where id_bibrec=%s) as dummy " "where o.tag='100__a' AND o.id = dummy.iid", (str(bibrec),)) bibrec_names_700 = run_sql("select o.id, o.value from bib70x o, " "(select i.id_bibxxx as iid from bibrec_bib70x i " "where id_bibrec=%s) as dummy " "where o.tag='700__a' AND o.id = dummy.iid", (str(bibrec),)) # bibrec_names_100 = run_sql("select id,value from bib10x where tag='100__a' and id in " # "(select id_bibxxx from bibrec_bib10x where id_bibrec=%s)", # (str(bibrec),)) # bibrec_names_700 = run_sql("select id,value from bib70x where tag='700__a' and id in " # "(select id_bibxxx from bibrec_bib70x where id_bibrec=%s)", # (str(bibrec),)) bibreflist = [] for b in bibrec_names_100: spb = split_name_parts(b[1]) for n in splitted_names: if (n[0].lower() == spb[0].lower()) or always_match: if ['100:' + str(b[0]), b[1]] not in bibreflist: bibreflist.append(['100:' + str(b[0]), b[1]]) for b in bibrec_names_700: spb = split_name_parts(b[1]) for n in splitted_names: if (n[0].lower() == spb[0].lower()) or always_match: if ['700:' + str(b[0]), b[1]] not in bibreflist: bibreflist.append(['700:' + str(b[0]), b[1]]) return bibreflist def user_can_modify_paper(uid, paper): ''' Return True if the uid can modify this paper, false otherwise. If the paper is assigned more then one time (from algorithms) consider the most privileged assignment. @param uid: the user id @type: int @param paper: the paper bibref,bibrec pair x00:1234,4321 @type: str @return: can user mofidfy paper attribution? @rtype: boolean ''' bibref, rec = paper.split(",") table, ref = bibref.split(":") prow = run_sql("select personid, lcul from aidPERSONIDPAPERS " "where bibref_table = %s and bibref_value = %s and bibrec = %s " "order by lcul desc limit 0,1", (table, ref, rec)) if len(prow) == 0: return ((acc_authorize_action(uid, bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS)[0] == 0) or (acc_authorize_action(uid, bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS)[0] == 0)) min_req_acc_n = int(prow[0][1]) req_acc = resolve_paper_access_right(bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS) pid_uid = run_sql("select data from aidPERSONIDDATA where tag = %s and personid = %s", ('uid', str(prow[0][0]))) if len(pid_uid) > 0: if (str(pid_uid[0][0]) != str(uid)) and min_req_acc_n > 0: req_acc = resolve_paper_access_right(bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS) if min_req_acc_n < req_acc: min_req_acc_n = req_acc min_req_acc = resolve_paper_access_right(min_req_acc_n) return (acc_authorize_action(uid, min_req_acc)[0] == 0) and (resolve_paper_access_right(min_req_acc) >= min_req_acc_n) def resolve_paper_access_right(acc): ''' Given a string or an integer, resolves to the corresponding integer or string If asked for a wrong/not present parameter falls back to the minimum privilege. ''' access_dict = {bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE: 0, bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS: 25, bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS: 50} if isinstance(acc, str): try: return access_dict[acc] except: return 0 inverse_dict = dict([[v, k] for k, v in access_dict.items()]) lower_accs = [a for a in inverse_dict.keys() if a <= acc] try: return inverse_dict[max(lower_accs)] except: return bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE def get_recently_modified_record_ids(date): ''' Returns the bibrecs with modification date more recent then date. @param date: date ''' - return [p[0] for p in run_sql( - "select id from bibrec where modification_date > %s", (date,))] + touched_papers = frozenset(p[0] for p in run_sql( + "select id from bibrec " + "where modification_date > %s" + , (date,))) + return touched_papers & frozenset(get_all_valid_bibrecs()) def filter_modified_record_ids(bibrecs, date): ''' Returns the bibrecs with modification date before the date. @param date: date ''' return ifilter( lambda x: run_sql("select count(*) from bibrec " "where id = %s and " "modification_date < %s" , (x[2], date))[0][0] , bibrecs) def get_cached_author_page(pageparam): ''' Return cached authorpage @param: pageparam (int personid) @return (id, 'authorpage_cache', personid, authorpage_html, date_cached) ''' #TABLE: id, tag, identifier, data, date caches = run_sql("select id, object_name, object_key, object_value, last_updated \ from aidCACHE \ where object_name='authorpage_cache' and object_key=%s", (str(pageparam),)) if len(caches) >= 1: return caches[0] else: return [] def delete_cached_author_page(personid): ''' Deletes from the author page cache the page concerning one person ''' run_sql("delete from aidCACHE where object_name='authorpage_cache' and object_key=%s", (str(personid),)) def update_cached_author_page_timestamp(pageparam): ''' Updates cached author page timestamp @param pageparam: int personid ''' #TABLE: id, tag, identifier, data, date run_sql("update aidCACHE set last_updated=now() where object_name='authorpage_cache' and object_key=%s", (str(pageparam),)) def update_cached_author_page(pageparam, page): ''' Updates cached author page, deleting old caches for same pageparam @param pageparam: int personid @param page: string html authorpage ''' #TABLE: id, tag, identifier, data, date run_sql("delete from aidCACHE where object_name='authorpage_cache' and object_key=%s", (str(pageparam),)) run_sql("insert into aidCACHE values (Null,'authorpage_cache',%s,%s,now())", (str(pageparam), str(page))) -def get_user_log(transactionid='', userinfo='', personID='', action='', tag='', value='', comment='', only_most_recent=False): +def get_user_log(transactionid='', userinfo='', userid='', personID='', action='', tag='', value='', comment='', only_most_recent=False): ''' Get user log table entry matching all the given parameters; all of them are optional. IF no parameters are given retuns the complete log table @param transactionid: id of the transaction @param userinfo: user name or identifier @param personid: id of the person involved @param action: action @param tag: tag @param value: value @param comment: comment ''' sql_query = ('select id,transactionid,timestamp,userinfo,personid,action,tag,value,comment ' + 'from aidUSERINPUTLOG where 1 ') if transactionid: sql_query += ' and transactionid=\'' + str(transactionid) + '\'' if userinfo: sql_query += ' and userinfo=\'' + str(userinfo) + '\'' + if userid: + sql_query += ' and userid=\'' + str(userid) + '\'' if personID: sql_query += ' and personid=\'' + str(personID) + '\'' if action: sql_query += ' and action=\'' + str(action) + '\'' if tag: sql_query += ' and tag=\'' + str(tag) + '\'' if value: sql_query += ' and value=\'' + str(value) + '\'' if comment: sql_query += ' and comment=\'' + str(comment) + '\'' if only_most_recent: sql_query += ' order by timestamp desc limit 0,1' return run_sql(sql_query) def list_2_SQL_str(items, f=lambda x: x): """ Concatenates all items in items to a sql string using f. @param items: a set of items @param type items: X @param f: a function which transforms each item from items to string @param type f: X:->str @return: "(x1, x2, x3, ... xn)" for xi in items @return type: string """ strs = (str(f(x)) for x in items) return "(%s)" % ", ".join(strs) -def get_authors_from_paper(paper): +def _get_authors_from_paper_from_db(paper): ''' selects all author bibrefs by a given papers ''' fullbibrefs100 = run_sql("select id_bibxxx from bibrec_bib10x where id_bibrec=%s", (paper,)) if len(fullbibrefs100) > 0: fullbibrefs100str = list_2_SQL_str(fullbibrefs100, lambda x: str(x[0])) return run_sql("select id from bib10x where tag='100__a' and id in %s" % (fullbibrefs100str,)) return tuple() -def get_coauthors_from_paper(paper): +def _get_authors_from_paper_from_cache(paper): + ''' + selects all author bibrefs by a given papers + ''' + try: + ids = MARC_100_700_CACHE['brb100'][paper]['id'].keys() + refs = [i for i in ids if '__a' in MARC_100_700_CACHE['b100'][i][0]] + except KeyError: + return tuple() + return zip(refs) + +def get_authors_from_paper(paper): + if MARC_100_700_CACHE: + if bconfig.DEBUG_CHECKS: + assert _get_authors_from_paper_from_cache(paper) == _get_authors_from_paper_from_cache(paper) + return _get_authors_from_paper_from_cache(paper) + else: + return _get_authors_from_paper_from_db(paper) + +def _get_coauthors_from_paper_from_db(paper): ''' selects all coauthor bibrefs by a given papers ''' fullbibrefs700 = run_sql("select id_bibxxx from bibrec_bib70x where id_bibrec=%s", (paper,)) if len(fullbibrefs700) > 0: fullbibrefs700str = list_2_SQL_str(fullbibrefs700, lambda x: str(x[0])) return run_sql("select id from bib70x where tag='700__a' and id in %s" % (fullbibrefs700str,)) return tuple() +def _get_coauthors_from_paper_from_cache(paper): + ''' + selects all author bibrefs by a given papers + ''' + try: + ids = MARC_100_700_CACHE['brb700'][paper]['id'].keys() + refs = [i for i in ids if '__a' in MARC_100_700_CACHE['b700'][i][0]] + except KeyError: + return tuple() + return zip(refs) + +def get_coauthors_from_paper(paper): + if MARC_100_700_CACHE: + if bconfig.DEBUG_CHECKS: + assert _get_coauthors_from_paper_from_cache(paper) == _get_coauthors_from_paper_from_db(paper) + return _get_coauthors_from_paper_from_cache(paper) + else: + return _get_coauthors_from_paper_from_db(paper) + def get_bibrefrec_subset(table, papers, refs): table = "bibrec_bib%sx" % str(table)[:-1] contents = run_sql("select id_bibrec, id_bibxxx from %s" % table) papers = set(papers) refs = set(refs) # yes, there are duplicates and we must set them return set(ifilter(lambda x: x[0] in papers and x[1] in refs, contents)) def get_deleted_papers(): return run_sql("select o.id_bibrec from bibrec_bib98x o, " "(select i.id as iid from bib98x i " "where value = 'DELETED' " "and tag like '980__a') as dummy " "where o.id_bibxxx = dummy.iid") +def add_personID_external_id(personid, external_id_str, value): + run_sql("insert into aidPERSONIDDATA (personid,tag,data) values (%s,%s,%s)", + (personid, 'extid:%s' % external_id_str, value)) + +def remove_personID_external_id(personid, external_id_str, value=False): + if not value: + run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s", + (personid, 'extid:%s' % external_id_str)) + else: + run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s and data=%s", + (personid, 'extid:%s' % external_id_str, value)) + +def get_personiID_external_ids(personid): + ids = run_sql("select tag,data from aidPERSONIDDATA where personid=%s and tag like 'extid:%%'", + (personid,)) + + extids = {} + for i in ids: + id_str = i[0].split(':')[1] + idd = i[1] + try: + extids[id_str].append(idd) + except KeyError: + extids[id_str] = [idd] + return extids + #bibauthorid_maintenance personid update private methods def update_personID_canonical_names(persons_list=None, overwrite=False, suggested=''): ''' Updates the personID table creating or updating canonical names for persons @param: persons_list: persons to consider for the update (('1'),) @param: overwrite: if to touch already existing canonical names @param: suggested: string to suggest a canonical name for the person ''' - if not persons_list: - persons_list = [x[0] for x in run_sql('select distinct personid from aidPERSONIDPAPERS')] + if not persons_list and overwrite: + persons_list = set([x[0] for x in run_sql('select personid from aidPERSONIDPAPERS')]) + elif not persons_list: + persons_list = set([x[0] for x in run_sql('select personid from aidPERSONIDPAPERS')]) + existing_cnamed_pids = set( + [x[0] for x in run_sql('select personid from aidPERSONIDDATA where tag=%s', + ('canonical_name',))]) + persons_list = persons_list - existing_cnamed_pids for idx, pid in enumerate(persons_list): update_status(float(idx) / float(len(persons_list)), "Updating canonical_names...") current_canonical = run_sql("select data from aidPERSONIDDATA where " "personid=%s and tag=%s", (pid, 'canonical_name')) if overwrite or len(current_canonical) == 0: run_sql("delete from aidPERSONIDDATA where personid=%s and tag=%s", (pid, 'canonical_name')) names = get_person_names_count(pid) names = sorted(names, key=lambda k: k[1], reverse=True) if len(names) < 1 and not suggested: continue else: if suggested: canonical_name = suggested else: canonical_name = create_canonical_name(names[0][0]) existing_cnames = run_sql("select data from aidPERSONIDDATA " "where tag=%s and data like %s", ('canonical_name', str(canonical_name) + '%')) existing_cnames = set(name[0].lower() for name in existing_cnames) for i in count(1): cur_try = canonical_name + '.' + str(i) if cur_try.lower() not in existing_cnames: canonical_name = cur_try break run_sql("insert into aidPERSONIDDATA (personid, tag, data) values (%s,%s,%s) ", (pid, 'canonical_name', canonical_name)) update_status_final("Updating canonical_names finished.") def personid_get_recids_affected_since(last_timestamp): ''' Returns a list of recids which have been manually changed since timestamp @TODO: extend the system to track and signal even automatic updates (unless a full reindex is acceptable in case of magic automatic update) @param: last_timestamp: last update, datetime.datetime ''' vset = set(int(v[0].split(',')[1]) for v in run_sql( "select distinct value from aidUSERINPUTLOG " "where timestamp > %s", (last_timestamp,)) if ',' in v[0] and ':' in v[0]) pids = set(int(p[0]) for p in run_sql( "select distinct personid from aidUSERINPUTLOG " "where timestamp > %s", (last_timestamp,)) if p[0] > 0) if pids: pids_s = list_2_SQL_str(pids) vset |= set(int(b[0]) for b in run_sql( "select bibrec from aidPERSONIDPAPERS " "where personid in %s" % pids_s)) return list(vset) # I'm not sure about this cast. It might work without it. def get_all_paper_records(pid, claimed_only=False): if not claimed_only: return run_sql("SELECT distinct bibrec FROM aidPERSONIDPAPERS WHERE personid = %s", (str(pid),)) else: return run_sql("SELECT distinct bibrec FROM aidPERSONIDPAPERS WHERE " "personid = %s and flag=2 or flag=-2", (str(pid),)) -def get_all_names_from_personid(): - return ((name[0][0], set(n[1] for n in name), len(name)) - for name in (run_sql( - "SELECT personid, name " - "FROM aidPERSONIDPAPERS " - "WHERE personid = %s " - "AND flag > -2", p) - for p in run_sql( - "SELECT DISTINCT personid " - "FROM aidPERSONIDPAPERS " - "WHERE flag > -2") - )) +def get_all_modified_names_from_personid(since=None): + if since: + all_pids = run_sql("SELECT DISTINCT personid " + "FROM aidPERSONIDPAPERS " + "WHERE flag > -2 " + "AND last_updated > %s" + % since) + else: + all_pids = run_sql("SELECT DISTINCT personid " + "FROM aidPERSONIDPAPERS " + "WHERE flag > -2 ") + return ((name[0][0], set(n[1] for n in name), len(name)) + for name in (run_sql( + "SELECT personid, name " + "FROM aidPERSONIDPAPERS " + "WHERE personid = %s " + "AND flag > -2", p) + for p in all_pids)) + + +def destroy_partial_marc_caches(): + global MARC_100_700_CACHE + MARC_100_700_CACHE = None + gc.collect() + +def populate_partial_marc_caches(): + global MARC_100_700_CACHE + + if MARC_100_700_CACHE: + return + + def br_dictionarize(maptable): + md = {} + for i in maptable: + try: + try: + md[i[0]]['id'][i[1]].append(i[2]) + except KeyError: + md[i[0]]['id'][i[1]] = [i[2]] + try: + md[i[0]]['fn'][i[2]].append(i[1]) + except KeyError: + md[i[0]]['fn'][i[2]] = [i[1]] + except KeyError: + md[i[0]] = {'id':{}, 'fn':{}} + md[i[0]]['id'][i[1]] = [i[2]] + md[i[0]]['fn'][i[2]] = [i[1]] + return md + + def bib_dictionarize(bibtable): + md = {} + for i in bibtable: + md[i[0]] = (i[1], i[2]) + return md + + + update_status(.0, 'Populating get_grouped_records_table_cache') + bibrec_bib10x = run_sql("select * from bibrec_bib10x") + update_status(.125, 'Populating get_grouped_records_table_cache') + brd_b10x = br_dictionarize(bibrec_bib10x) + del bibrec_bib10x + + update_status(.25, 'Populating get_grouped_records_table_cache') + bibrec_bib70x = run_sql("select * from bibrec_bib70x") + update_status(.375, 'Populating get_grouped_records_table_cache') + brd_b70x = br_dictionarize(bibrec_bib70x) + del bibrec_bib70x + + update_status(.5, 'Populating get_grouped_records_table_cache') + bib10x = run_sql("select * from bib10x") + update_status(.625, 'Populating get_grouped_records_table_cache') + bibd_10x = bib_dictionarize(bib10x) + del bib10x + + update_status(.75, 'Populating get_grouped_records_table_cache') + bib70x = run_sql("select * from bib70x") + update_status(.875, 'Populating get_grouped_records_table_cache') + bibd_70x = bib_dictionarize(bib70x) + del bib70x + + update_status_final('Finished populating get_grouped_records_table_cache') + MARC_100_700_CACHE = {'brb100':brd_b10x, 'brb700':brd_b70x, 'b100':bibd_10x, 'b700':bibd_70x} + +def _get_grouped_records_using_caches(brr, *args): + try: + c = MARC_100_700_CACHE['brb%s' % str(brr[0])][brr[2]] + fn = c['id'][brr[1]] + except KeyError: + return dict((arg, []) for arg in args) + if not fn or len(fn) > 1: + #if len fn > 1 it's BAD: the same signature is at least twice on the same paper. + #Let's default to nothing, to be on the safe side. + return dict((arg, []) for arg in args) + ids = set(chain(*(c['fn'][i] for i in fn))) + tuples = [MARC_100_700_CACHE['b%s' % str(brr[0])][i] for i in ids] + results = {} + for t in tuples: + if t[0] in args: + try: + results[t[0]].append(t[1]) + except KeyError: + results[t[0]] = [t[1]] + for arg in args: + if arg not in results.keys(): + results[arg] = [] + return results -def get_grouped_records(bibrefrec, *args): +def _get_grouped_records_from_db(bibrefrec, *args): ''' By a given bibrefrec: mark:ref,rec this function will scan bibmarkx table and extract all records with tag in argc, which are grouped togerther with this bibrec. Returns a dictionary with { tag : [extracted_values] } if the values is not found. - @type bibrefrec: (mark(int), ref(int), rec(int)) ''' table, ref, rec = bibrefrec target_table = "bib%sx" % (str(table)[:-1]) mapping_table = "bibrec_%s" % target_table group_id = run_sql("SELECT field_number " "FROM %s " "WHERE id_bibrec = %d " "AND id_bibxxx = %d" % (mapping_table, rec, ref)) if len(group_id) == 0: # unfortunately the mapping is not found, so # we cannot find anything return dict((arg, []) for arg in args) elif len(group_id) == 1: # All is fine field_number = group_id[0][0] else: # sounds bad, but ignore the error - field_number = group_id[0][0] + field_number = min(x[0] for x in group_id) grouped = run_sql("SELECT id_bibxxx " "FROM %s " "WHERE id_bibrec = %d " "AND field_number = %d" % (mapping_table, rec, int(field_number))) assert len(grouped) > 0 grouped_s = list_2_SQL_str(grouped, lambda x: str(x[0])) ret = {} for arg in args: qry = run_sql("SELECT value " "FROM %s " "WHERE tag LIKE '%s' " "AND id IN %s" % (target_table, arg, grouped_s)) ret[arg] = [q[0] for q in qry] return ret -def get_name_by_bibrecref(bib): +def get_grouped_records(bibrefrec, *args): + if MARC_100_700_CACHE: + if bconfig.DEBUG_CHECKS: + assert _get_grouped_records_using_caches(bibrefrec, *args) == _get_grouped_records_from_db(bibrefrec, *args) + return _get_grouped_records_using_caches(bibrefrec, *args) + else: + return _get_grouped_records_from_db(bibrefrec, *args) + +def get_person_with_extid(idd, match_tag=False): + if match_tag: + mtag = " and tag = '%s'" % 'extid:' + match_tag + else: + mtag = '' + pids = run_sql("select personid from aidPERSONIDDATA where data=%s" % '%s' + mtag, (idd,)) + return set(pids) + +def get_inspire_id(p): + ''' + Gets inspire id for a signature (bibref_table,bibref_value.bibrec) + ''' + return get_grouped_records((str(p[0]), p[1], p[2]), str(p[0]) + '__i').values()[0] + +def collect_personID_external_ids_from_papers(personid, limit_to_claimed_papers=False): + gathered_ids = {} + + if limit_to_claimed_papers: + flag = 1 + else: + flag = -2 + + person_papers = run_sql("select bibref_table,bibref_value,bibrec from aidPERSONIDPAPERS where " + "personid=%s and flag > %s", (personid, flag)) + + if COLLECT_INSPIRE_ID: + inspireids = [] + for p in person_papers: + extid = get_inspire_id(p) + if extid: + inspireids.append(extid) + inspireids = set((i[0] for i in inspireids)) + + gathered_ids['INSPIREID'] = inspireids + return gathered_ids + +def update_personID_external_ids(persons_list=None, overwrite=False, + limit_to_claimed_papers=False, force_cache_tables=False): + if force_cache_tables: + populate_partial_marc_caches() + + if not persons_list: + persons_list = set([x[0] for x in run_sql('select personid from aidPERSONIDPAPERS')]) + + for idx, pid in enumerate(persons_list): + update_status(float(idx) / float(len(persons_list)), "Updating external ids...") + + collected = collect_personID_external_ids_from_papers(pid, limit_to_claimed_papers=limit_to_claimed_papers) + present = get_personiID_external_ids(pid) + + if overwrite: + for idd in present.keys(): + for k in present[idd]: + remove_personID_external_id(pid, idd, value=k) + present = {} + + for idd in collected.keys(): + for k in collected[idd]: + if idd not in present or k not in present[idd]: + add_personID_external_id(pid, idd, k) + + if force_cache_tables: + destroy_partial_marc_caches() + + update_status_final("Updating external ids finished.") + +def _get_name_by_bibrecref_from_db(bib): ''' @param bib: bibrefrec or bibref @type bib: (mark, bibref, bibrec) OR (mark, bibref) ''' - table = "bib%sx" % (str(bib[0])[:-1]) + table = "bib%sx" % str(bib[0])[:-1] refid = bib[1] - tag = "%s__a" % bib[0] + tag = "%s__a" % str(bib[0]) ret = run_sql("select value from %s where id = '%s' and tag = '%s'" % (table, refid, tag)) # if zero - check if the garbage collector has run assert len(ret) == 1 return ret[0][0] +def _get_name_by_bibrecref_from_cache(bib): + ''' + @param bib: bibrefrec or bibref + @type bib: (mark, bibref, bibrec) OR (mark, bibref) + ''' + table = "b%s" % bib[0] + refid = bib[1] + tag = "__a" + ret = None + try: + if tag in MARC_100_700_CACHE[table][refid][0]: + ret = MARC_100_700_CACHE[table][refid][1] + except (KeyError, IndexError), e: + #The GC did run and the table is not clean? + #We might want to allow empty response here + raise Exception(str(bib) + str(e)) + assert ret + return ret + +def get_name_by_bibrecref(bib): + if MARC_100_700_CACHE: + return _get_name_by_bibrecref_from_cache(bib) + else: + return _get_name_by_bibrecref_from_db(bib) def get_collaboration(bibrec): bibxxx = run_sql("select id_bibxxx from bibrec_bib71x where id_bibrec = %s", (str(bibrec),)) if len(bibxxx) == 0: return () bibxxx = list_2_SQL_str(bibxxx, lambda x: str(x[0])) ret = run_sql("select value from bib71x where id in %s and tag like '%s'" % (bibxxx, "710__g")) return [r[0] for r in ret] def get_key_words(bibrec): if bconfig.CFG_ADS_SITE: bibxxx = run_sql("select id_bibxxx from bibrec_bib65x where id_bibrec = %s", (str(bibrec),)) else: bibxxx = run_sql("select id_bibxxx from bibrec_bib69x where id_bibrec = %s", (str(bibrec),)) if len(bibxxx) == 0: return () bibxxx = list_2_SQL_str(bibxxx, lambda x: str(x[0])) if bconfig.CFG_ADS_SITE: ret = run_sql("select value from bib69x where id in %s and tag like '%s'" % (bibxxx, "6531_a")) else: ret = run_sql("select value from bib69x where id in %s and tag like '%s'" % (bibxxx, "695__a")) return [r[0] for r in ret] def get_all_authors(bibrec): bibxxx_1 = run_sql("select id_bibxxx from bibrec_bib10x where id_bibrec = %s", (str(bibrec),)) bibxxx_7 = run_sql("select id_bibxxx from bibrec_bib70x where id_bibrec = %s", (str(bibrec),)) if bibxxx_1: bibxxxs_1 = list_2_SQL_str(bibxxx_1, lambda x: str(x[0])) authors_1 = run_sql("select value from bib10x where tag = '%s' and id in %s" % ('100__a', bibxxxs_1,)) else: authors_1 = [] if bibxxx_7: bibxxxs_7 = list_2_SQL_str(bibxxx_7, lambda x: str(x[0])) authors_7 = run_sql("select value from bib70x where tag = '%s' and id in %s" % ('700__a', bibxxxs_7,)) else: authors_7 = [] return [a[0] for a in authors_1] + [a[0] for a in authors_7] def get_title_from_rec(rec): """ Returns the name of the paper like str if found. Otherwise returns None. """ title_id = run_sql("SELECT id_bibxxx " "FROM bibrec_bib24x " "WHERE id_bibrec = %s", (rec,)) if title_id: title_id_s = list_2_SQL_str(title_id, lambda x: x[0]) title = run_sql("SELECT value " "FROM bib24x " "WHERE id in %s " "AND tag = '245__a'" % title_id_s) if title: return title[0][0] def get_bib10x(): return run_sql("select id, value from bib10x where tag like %s", ("100__a",)) def get_bib70x(): return run_sql("select id, value from bib70x where tag like %s", ("700__a",)) -class bib_matrix: +class Bib_matrix(object): ''' This small class contains the sparse matrix and encapsulates it. ''' # please increment this value every time you # change the output of the comparison functions - current_comparison_version = 9 + current_comparison_version = 10 - special_items = ((None, -3., 'N'), ('+', -2., '+'), ('-', -1., '-')) - special_symbols = dict((x[0], (x[1], x[2])) for x in special_items) - special_numbers = dict((x[1], (x[0], x[2])) for x in special_items) - special_strings = dict((x[2], (x[0], x[1])) for x in special_items) + __special_items = ((None, -3.), ('+', -2.), ('-', -1.)) + special_symbols = dict((x[0], x[1]) for x in __special_items) + special_numbers = dict((x[1], x[0]) for x in __special_items) def __init__(self, cluster_set=None): if cluster_set: - bibs = chain(*(cl.bibs for cl in cluster_set.clusters)) - self._bibmap = dict((b[1], b[0]) for b in enumerate(bibs)) + self._bibmap = dict((b[1], b[0]) for b in enumerate(cluster_set.all_bibs())) width = len(self._bibmap) size = ((width - 1) * width) / 2 - self._matrix = bib_matrix.create_empty_matrix(size) + self._matrix = Bib_matrix.create_empty_matrix(size) else: self._bibmap = dict() + self.creation_time = get_sql_time() + @staticmethod def create_empty_matrix(lenght): ret = numpy.ndarray(shape=(lenght, 2), dtype=float, order='C') - ret.fill(bib_matrix.special_symbols[None][0]) + ret.fill(Bib_matrix.special_symbols[None]) return ret def _resolve_entry(self, bibs): entry = sorted(self._bibmap[bib] for bib in bibs) assert entry[0] < entry[1] return entry[0] + ((entry[1] - 1) * entry[1]) / 2 def __setitem__(self, bibs, val): entry = self._resolve_entry(bibs) - - if val in self.special_symbols: - num = self.special_symbols[val][0] - val = (num, num) - - self._matrix[entry] = val + self._matrix[entry] = Bib_matrix.special_symbols.get(val, val) def __getitem__(self, bibs): entry = self._resolve_entry(bibs) - ret = self._matrix[entry] - if ret[0] in self.special_numbers: - return self.special_numbers[ret[0]][0] - return ret[0], ret[1] + ret = tuple(self._matrix[entry]) + return Bib_matrix.special_numbers.get(ret[0], ret) def __contains__(self, bib): return bib in self._bibmap def get_keys(self): return self._bibmap.keys() @staticmethod - def __pickle_tuple(tupy): - ''' - tupy can be a very special iterable. It may contain: - * (float, float) - * None - * '+', '-' or '?' - ''' - def to_str(elem): - if elem[0] in bib_matrix.special_numbers: - return "%s" % bib_matrix.special_numbers[elem[0]][1] - return "%.2f:%.2f" % (elem[0], elem[1]) - - return "|".join(imap(to_str, tupy)) + def get_file_dir(name): + sub_dir = name[:2] + if not sub_dir: + sub_dir = "empty_last_name" + return "%s%s/" % (bconfig.TORTOISE_FILES_PATH, sub_dir) @staticmethod - def __unpickle_tuple(tupy): - ''' - tupy must be an object created by pickle_tuple. - ''' - def from_str(elem): - if elem in bib_matrix.special_strings: - nummy = bib_matrix.special_strings[elem][1] - return (nummy, nummy) - fls = elem.split(":") - assert len(fls) == 2 - return (float(fls[0]), float(fls[1])) - - strs = tupy.split("|") - if strs == ['']: - strs = [] - ret = bib_matrix.create_empty_matrix(len(strs)) - for i, stri in enumerate(strs): - if i % 100000 == 0: - update_status(float(i) / len(strs), "Loading the cache...") - ret[i][0], ret[i][1] = from_str(stri) - update_status_final("Probability matrix loaded.") - return ret + def get_map_path(dir_path, name): + return "%s%s.bibmap" % (dir_path, name) + + @staticmethod + def get_matrix_path(dir_path, name): + return "%s%s.npy" % (dir_path, name) + + def load(self, name, load_map=True, load_matrix=True): + files_dir = Bib_matrix.get_file_dir(name) - def load(self, name): - ''' - This method will load the matrix from the - database. - ''' - row = run_sql("select bibmap, matrix " - "from aidPROBCACHE " - "where cluster like %s", - (name,)) - if len(row) == 0: + if not os.path.isdir(files_dir): self._bibmap = dict() return False - elif len(row) == 1: - bibmap_vs = zlib.decompress(row[0][0]) - bibmap_v = cPickle.loads(bibmap_vs) - rec_v, self.creation_time, self._bibmap = bibmap_v - if (rec_v != bib_matrix.current_comparison_version or - bib_matrix.current_comparison_version < 0): # you can use negative - # version to recalculate - self._bibmap = dict() - return False - matrix_s = zlib.decompress(row[0][1]) - self._matrix = bib_matrix.__unpickle_tuple(matrix_s) - if self._bibmap and self._matrix != None: - if len(self._bibmap) * (len(self._bibmap) - 1) / 2 != len(self._matrix): - print >> sys.stderr, ("Error: aidPROBCACHE is corrupted! " - "Cluster %s has bibmap with %d bibs, " - "but matrix with %d entries." - % (name, len(self._bibmap), len(self._matrix))) - print >> sys.stderr, "Try to increase max_packet_size." - assert False, "Bibmap: %d, Matrix %d" % (len(self._bibmap), len(self._matrix)) + try: + if load_map: + bibmap_v = cPickle.load(open(Bib_matrix.get_map_path(files_dir, name), 'r')) + rec_v, self.creation_time, self._bibmap = bibmap_v + if (rec_v != Bib_matrix.current_comparison_version or + Bib_matrix.current_comparison_version < 0): # you can use negative + # version to recalculate + self._bibmap = dict() return False - return True - else: + + if load_matrix: + self._matrix = numpy.load(Bib_matrix.get_matrix_path(files_dir, name)) + except (IOError, UnpicklingError): + if load_map: self._bibmap = dict() + self.creation_time = get_sql_time() return False - else: - assert False, "aidPROBCACHE is corrupted" - self._bibmap = dict() - return False + return True - def store(self, name, creation_time): - bibmap_v = (bib_matrix.current_comparison_version, creation_time, self._bibmap) - bibmap_vs = cPickle.dumps(bibmap_v) - bibmap_vsc = zlib.compress(bibmap_vs) + def store(self, name): + files_dir = Bib_matrix.get_file_dir(name) - matrix_s = bib_matrix.__pickle_tuple(self._matrix) - matrix_sc = zlib.compress(matrix_s) + if not os.path.isdir(files_dir): + try: + os.mkdir(files_dir) + except OSError, e: + if e.errno == 17 or 'file exists' in str(e.strerror).lower(): + pass + else: + raise e - run_sql("delete from aidPROBCACHE where cluster like %s", (name,)) - run_sql("insert low_priority " - "into aidPROBCACHE " - "set cluster = %s, " - "bibmap = %s, " - "matrix = %s", - (name, bibmap_vsc, matrix_sc)) + bibmap_v = (Bib_matrix.current_comparison_version, self.creation_time, self._bibmap) + cPickle.dump(bibmap_v, open(Bib_matrix.get_map_path(files_dir, name), 'w')) + numpy.save(open(Bib_matrix.get_matrix_path(files_dir, name), "w"), self._matrix) def delete_paper_from_personid(rec): ''' Deletes all information in PERSONID about a given paper ''' run_sql("delete from aidPERSONIDPAPERS where bibrec = %s", (rec,)) def get_signatures_from_rec(bibrec): ''' Retrieves all information in PERSONID about a given bibrec. ''' return run_sql("select personid, bibref_table, bibref_value, bibrec, name " "from aidPERSONIDPAPERS where bibrec = %s" , (bibrec,)) def modify_signature(oldref, oldrec, newref, newname): ''' Modifies a signature in aidPERSONIDpapers. ''' return run_sql("UPDATE aidPERSONIDPAPERS " "SET bibref_table = %s, bibref_value = %s, name = %s " "WHERE bibref_table = %s AND bibref_value = %s AND bibrec = %s" , (str(newref[0]), newref[1], newname, str(oldref[0]), oldref[1], oldrec)) def find_pids_by_name(name): ''' Finds names and personids by a prefix name. ''' return set(run_sql("SELECT personid, name " "FROM aidPERSONIDPAPERS " "WHERE name like %s" , (name + ',%',))) def find_pids_by_exact_name(name): """ Finds names and personids by a name. """ return set(run_sql("SELECT personid " "FROM aidPERSONIDPAPERS " "WHERE name = %s" , (name,))) def remove_sigs(signatures): ''' Removes records from aidPERSONIDPAPERS ''' for sig in signatures: run_sql("DELETE FROM aidPERSONIDPAPERS " "WHERE bibref_table like %s AND bibref_value = %s AND bibrec = %s" , (str(sig[0]), sig[1], sig[2])) def remove_personid_papers(pids): ''' Removes all signatures from aidPERSONIDPAPERS with pid in pids ''' if pids: run_sql("delete from aidPERSONIDPAPERS where personid in %s" % list_2_SQL_str(pids)) def get_full_personid_papers(table_name="`aidPERSONIDPAPERS`"): ''' Get all columns and rows from aidPERSONIDPAPERS or any other table with the same structure. ''' return run_sql("select personid, bibref_table, " "bibref_value, bibrec, name, flag, " "lcul from %s" % table_name) def get_full_results(): ''' Depricated. Should be removed soon. ''' return run_sql("select personid, bibref_table, bibref_value, bibrec " "from aidRESULTS") def get_lastname_results(last_name): ''' Returns rows from aidRESULTS which share a common last name. ''' return run_sql("select personid, bibref_table, bibref_value, bibrec " "from aidRESULTS " "where personid like '" + last_name + ".%'") def get_full_personid_data(table_name="`aidPERSONIDDATA`"): ''' Get all columns and rows from aidPERSONIDDATA or any other table with the same structure. ''' return run_sql("select personid, tag, data, " "opt1, opt2, opt3 from %s" % table_name) +def get_name_string_to_pid_dictionary(): + namesdict = {} + all_names = run_sql("select personid,name from aidPERSONIDPAPERS") + for x in all_names: + try: + namesdict[x[1]].add(x[0]) + except KeyError: + namesdict[x[1]] = set([x[0]]) + return namesdict def get_wrong_names(): ''' Returns a generator with all wrong names in aidPERSONIDPAPERS. Every element is (table, ref, correct_name). ''' bib100 = dict(((x[0], create_normalized_name(split_name_parts(x[1]))) for x in get_bib10x())) bib700 = dict(((x[0], create_normalized_name(split_name_parts(x[1]))) for x in get_bib70x())) - pidnames100 = run_sql("select distinct bibref_value, name from aidPERSONIDPAPERS " - " where bibref_table='100'") - pidnames700 = run_sql("select distinct bibref_value, name from aidPERSONIDPAPERS " - " where bibref_table='700'") + pidnames100 = set(run_sql("select bibref_value, name from aidPERSONIDPAPERS " + " where bibref_table='100'")) + pidnames700 = set(run_sql("select bibref_value, name from aidPERSONIDPAPERS " + " where bibref_table='700'")) wrong100 = set(('100', x[0], bib100.get(x[0], None)) for x in pidnames100 if x[1] != bib100.get(x[0], None)) wrong700 = set(('700', x[0], bib700.get(x[0], None)) for x in pidnames700 if x[1] != bib700.get(x[0], None)) total = len(wrong100) + len(wrong700) return chain(wrong100, wrong700), total def check_personid_papers(output_file=None): ''' Checks all invariants of personid. Writes in stdout if output_file if False. ''' if output_file: fp = open(output_file, "w") printer = lambda x: fp.write(x + '\n') else: printer = bibauthor_print checkers = (check_duplicated_papers, check_duplicated_signatures, check_wrong_names, check_canonical_names, - check_empty_personids, + # check_empty_personids, check_wrong_rejection, # check_claim_ispireid_contradiction, ) # Avoid writing f(a) or g(a), because one of the calls # might be optimized. return all([check(printer) for check in checkers]) def check_duplicated_papers(printer): ret = True - pids = run_sql("select distinct personid from aidPERSONIDPAPERS") + pids = set(run_sql("select personid from aidPERSONIDPAPERS")) for pid in pids: pid = pid[0] recs = run_sql("select bibrec from aidPERSONIDPAPERS where personid = %s and flag <> %s", (pid, -2)) recs = [rec[0] for rec in recs] for rec in set(recs): recs.remove(rec) if recs: ret = False printer("Person %d has duplicated papers: %s" % (pid, str(tuple(set(recs))))) return ret def check_duplicated_signatures(printer): ret = True - recs = run_sql("select distinct bibrec from aidPERSONIDPAPERS") + recs = set(run_sql("select bibrec from aidPERSONIDPAPERS")) for rec in recs: rec = rec[0] refs = list(run_sql("select bibref_table, bibref_value from aidPERSONIDPAPERS where bibrec = %s and flag > %s", (rec, "-2"))) for ref in set(refs): refs.remove(ref) if refs: ret = False refs = sorted(refs) refs = groupby(refs) refs = ["Found %s:%s %d times." % (key[0], key[1], len(list(data)) + 1) for key, data in refs] printer("Paper %d has duplicated signatures:" % rec) for ref in refs: printer("\t%s" % ref) return ret def check_wrong_names(printer): ret = True wrong_names, number = get_wrong_names() if number > 0: ret = False printer("%d corrupted names in aidPERSONIDPAPERS." % number) for wrong_name in wrong_names: if wrong_name[2]: printer("Outdated name, '%s'(%s:%d)." % (wrong_name[2], wrong_name[0], wrong_name[1])) else: printer("Invalid id(%s:%d)." % (wrong_name[0], wrong_name[1])) return ret def check_canonical_names(printer): ret = True pid_cn = run_sql("select personid, data from aidPERSONIDDATA where tag = %s", ('canonical_name',)) pid_2_cn = dict((k, len(list(d))) for k, d in groupby(sorted(pid_cn, key=itemgetter(0)), key=itemgetter(0))) for pid in get_existing_personids(): canon = pid_2_cn.get(pid, 0) if canon != 1: if canon == 0: papers = run_sql("select count(*) from aidPERSONIDPAPERS where personid = %s", (pid,))[0][0] if papers != 0: printer("Personid %d does not have a canonical name, but have %d papers." % (pid, papers)) ret = False else: printer("Personid %d has %d canonical names.", (pid, canon)) ret = False return ret def check_empty_personids(printer): ret = True paper_pids = set(p[0] for p in run_sql("select personid from aidPERSONIDPAPERS")) data_pids = set(p[0] for p in run_sql("select personid from aidPERSONIDDATA")) for p in data_pids - paper_pids: fields = run_sql("select count(*) from aidPERSONIDDATA where personid = %s and tag <> %s", (p, "canonical_name",))[0][0] if fields == 0: printer("Personid %d has no papers and nothing else than canonical_name." % p) ret = False return ret def check_wrong_rejection(printer): ret = True all_rejections = run_sql("select personid, bibref_table, bibref_value, bibrec " "from aidPERSONIDPAPERS " "where flag = %s", ('-2',)) for rej in all_rejections: sigs = run_sql("select personid from aidPERSONIDPAPERS " "where bibref_table = %s " "and bibref_value = %s " "and bibrec = %s " "and flag <> '-2'", rej[1:]) # To avoid duplication of error messages don't complain # if the papers is assigned to more than one personids. if not sigs: printer("The paper (%s:%s,%s) was rejected from person %d, but never assigned or claimed." % (rej[1:] + rej[:1])) ret = False elif rej[1] in sigs: printer("Personid %d has both assigned and rejected paper (%s:%s,%s)." % rej) ret = False return ret def check_merger(): ''' This function presumes that copy_personid was called before the merger. ''' is_ok = True old_claims = set(run_sql("select personid, bibref_table, bibref_value, bibrec, flag " "from aidPERSONIDPAPERS_copy " "where flag = -2 or flag = 2")) cur_claims = set(run_sql("select personid, bibref_table, bibref_value, bibrec, flag " "from aidPERSONIDPAPERS " "where flag = -2 or flag = 2")) errors = ((old_claims - cur_claims, "Some claims were lost during the merge."), (cur_claims - old_claims, "Some new claims appeared after the merge.")) act = { -2 : 'Rejection', 2 : 'Claim' } for err_set, err_msg in errors: if err_set: is_ok = False bibauthor_print(err_msg) bibauthor_print("".join(" %s: personid %d %d:%d,%d\n" % - (act[cl[6]], cl[0], int(cl[1]), cl[2], cl[3]) for cl in err_set)) + (act[cl[4]], cl[0], int(cl[1]), cl[2], cl[3]) for cl in err_set)) old_assigned = set(run_sql("select bibref_table, bibref_value, bibrec " "from aidPERSONIDPAPERS_copy")) - #"where flag <> -2 and flag <> 2")) + #"where flag <> -2 and flag <> 2")) cur_assigned = set(run_sql("select bibref_table, bibref_value, bibrec " "from aidPERSONIDPAPERS")) - #"where flag <> -2 and flag <> 2")) + #"where flag <> -2 and flag <> 2")) errors = ((old_assigned - cur_assigned, "Some signatures were lost during the merge."), (cur_assigned - old_assigned, "Some new signatures appeared after the merge.")) for err_sig, err_msg in errors: if err_sig: is_ok = False bibauthor_print(err_msg) bibauthor_print("".join(" %s:%d,%d\n" % sig for sig in err_sig)) return is_ok def check_results(): is_ok = True all_result_rows = run_sql("select * from aidRESULTS") keyfunc = lambda x: x[1:] duplicated = (d for d in (list(d) for k, d in groupby(sorted(all_result_rows, key=keyfunc), key=keyfunc)) if len(d) > 1) for dd in duplicated: is_ok = False for d in dd: + print "Duplicated row in aidRESULTS" print "%s %s %s %s" % d print clusters = {} for rr in all_result_rows: clusters[rr[0]] = clusters.get(rr[0], []) + [rr[3]] faulty_clusters = dict((cid, len(recs) - len(set(recs))) for cid, recs in clusters.items() if not len(recs) == len(set(recs))) if faulty_clusters: is_ok = False print "Recids NOT unique in clusters!" print ("A total of %s clusters hold an average of %.2f duplicates" % (len(faulty_clusters), (sum(faulty_clusters.values()) / float(len(faulty_clusters))))) for c in faulty_clusters: print "Name: %-20s Size: %4d Faulty: %2d" % (c, len(clusters[c]), faulty_clusters[c]) return is_ok def check_claim_inspireid_contradiction(): iids10x = run_sql("select id from bib10x where tag = '100__i'") iids70x = run_sql("select id from bib70x where tag = '700__i'") refs10x = set(x[0] for x in run_sql("select id from bib10x where tag = '100__a'")) refs70x = set(x[0] for x in run_sql("select id from bib70x where tag = '700__a'")) if iids10x: iids10x = list_2_SQL_str(iids10x, lambda x: str(x[0])) iids10x = run_sql("select id_bibxxx, id_bibrec, field_number " "from bibrec_bib10x " "where id_bibxxx in %s" % iids10x) iids10x = ((row[0], [(ref, rec) for ref, rec in run_sql( "select id_bibxxx, id_bibrec " "from bibrec_bib10x " "where id_bibrec = '%s' " "and field_number = '%s'" % row[1:]) if ref in refs10x]) for row in iids10x) else: iids10x = () if iids70x: iids70x = list_2_SQL_str(iids70x, lambda x: str(x[0])) iids70x = run_sql("select id_bibxxx, id_bibrec, field_number " "from bibrec_bib70x " "where id_bibxxx in %s" % iids70x) iids70x = ((row[0], [(ref, rec) for ref, rec in run_sql( "select id_bibxxx, id_bibrec " "from bibrec_bib70x " "where id_bibrec = '%s' " "and field_number = '%s'" % (row[1:])) if ref in refs70x]) for row in iids70x) else: iids70x = () # [(iids, [bibs])] - inspired = list(chain(((iid, list(set(('100', ) + bib for bib in bibs))) for iid, bibs in iids10x), - ((iid, list(set(('700', ) + bib for bib in bibs))) for iid, bibs in iids70x))) + inspired = list(chain(((iid, list(set(('100',) + bib for bib in bibs))) for iid, bibs in iids10x), + ((iid, list(set(('700',) + bib for bib in bibs))) for iid, bibs in iids70x))) assert all(len(x[1]) == 1 for x in inspired) inspired = ((k, map(itemgetter(0), map(itemgetter(1), d))) for k, d in groupby(sorted(inspired, key=itemgetter(0)), key=itemgetter(0))) # [(inspireid, [bibs])] inspired = [([(run_sql("select personid " "from aidPERSONIDPAPERS " "where bibref_table = %s " "and bibref_value = %s " "and bibrec = %s " "and flag = '2'" , bib), bib) for bib in cluster[1]], cluster[0]) for cluster in inspired] # [([([pid], bibs)], inspireid)] for cluster, iid in inspired: pids = set(chain.from_iterable(imap(itemgetter(0), cluster))) if len(pids) > 1: print "InspireID: %s links the following papers:" % iid print map(itemgetter(1), cluster) print "More than one personid claimed them:" print list(pids) print continue if len(pids) == 0: # not even one paper with this inspireid has been # claimed, screw it continue pid = list(pids)[0][0] # The last step is to check all non-claimed papers for being # claimed by the person on some different signature. problem = (run_sql("select bibref_table, bibref_value, bibrec " "from aidPERSONIDPAPERS " "where bibrec = %s " "and personid = %s " "and flag = %s" , (bib[2], pid, 2)) for bib in (bib for lpid, bib in cluster if not lpid)) problem = list(chain.from_iterable(problem)) if problem: print "A personid has claimed a paper from an inspireid cluster and a contradictory paper." print "Personid %d" % pid print "Inspireid cluster %s" % str(map(itemgetter(1), cluster)) print "Contradicting claims: %s" % str(problem) print def repair_personid(): ''' This should make check_personid_papers() to return true. ''' - pids = run_sql("select distinct personid from aidPERSONIDPAPERS") + pids = set(run_sql("select personid from aidPERSONIDPAPERS")) lpids = len(pids) for i, pid in enumerate((p[0] for p in pids)): update_status(float(i) / lpids, "Checking per-pid...") rows = run_sql("select bibrec, bibref_table, bibref_value, flag " "from aidPERSONIDPAPERS where personid = %s", (pid,)) rows = ((k, list(d)) for k, d in groupby(sorted(rows, key=itemgetter(0)), itemgetter(0))) for rec, sigs in rows: if len(sigs) > 1: claimed = [sig for sig in sigs if sig[3] > 1] rejected = [sig for sig in sigs if sig[3] < -1] if len(claimed) == 1: sigs.remove(claimed[0]) elif len(claimed) == 0 and len(rejected) == 1: sigs.remove(rejected[0]) for sig in set(sigs): run_sql("delete from aidPERSONIDPAPERS " "where personid = %s " "and bibrec = %s " "and bibref_table = %s " "and bibref_value = %s " "and flag = %s" , (pid, sig[0], sig[1], sig[2], sig[3])) update_status_final("Done with per-pid fixing.") recs = run_sql("select distinct bibrec from aidPERSONIDPAPERS") lrecs = len(recs) for i, rec in enumerate((r[0] for r in recs)): update_status(float(i) / lrecs, "Checking per-rec...") rows = run_sql("select bibref_table, bibref_value, flag from aidPERSONIDPAPERS " "where bibrec = %s", (rec,)) kfuc = itemgetter(slice(0, 2)) rows = ((k, map(itemgetter(2), d)) for k, d in groupby(sorted(rows), kfuc)) for bibref, flags in rows: if len(flags) > 1: claimed = sum(1 for f in flags if f > 1) rejected = sum(1 for f in flags if f < -1) if claimed == 1: run_sql("delete from aidPERSONIDPAPERS " "where bibrec = %s " "and bibref_table = %s " "and bibref_value = %s " "and flag <> %s" , (rec, bibref[0], bibref[1], 2)) elif claimed == 0 and rejected == 1: run_sql("delete from aidPERSONIDPAPERS " "where bibrec = %s " "and bibref_table = %s " "and bibref_value = %s " "and flag <> %s" , (rec, bibref[0], bibref[1], -2)) else: run_sql("delete from aidPERSONIDPAPERS " "where bibrec = %s " "and bibref_table = %s " "and bibref_value = %s" , (rec, bibref[0], bibref[1])) update_status_final("Done with per-rec fixing.") update_status(0 / 1, "Fixing wrong names...") wrong_names, number = get_wrong_names() for i, w in enumerate(wrong_names): update_status(i / number, "Fixing wrong names...") if w[2]: run_sql("update aidPERSONIDPAPERS set name=%s where bibref_table=%s and bibref_value=%s", (w[2], w[0], w[1])) else: run_sql("delete from aidPERSONIDPAPERS where bibref_table=%s and bibref_value=%s", (w[0], w[1])) no_rejs = frozenset(run_sql("select bibref_table, bibref_value, bibrec from aidPERSONIDPAPERS where flag <> -2")) rejs = frozenset(run_sql("select bibref_table, bibref_value, bibrec from aidPERSONIDPAPERS where flag = -2")) floating_rejs = rejs - no_rejs update_personID_canonical_names(map(new_person_from_signature, floating_rejs)) update_status_final("Fixed all wrong names.") update_status(0, "Checking missing canonical names...") paper_pids = run_sql("select distinct personid from aidPERSONIDPAPERS") cname_pids = run_sql("select distinct personid from aidPERSONIDDATA where tag='canonical_name'") missing_cnames = list(set(p[0] for p in paper_pids) - set(p[0] for p in cname_pids)) npids = len(missing_cnames) for pid in missing_cnames: update_status(missing_cnames.index(pid) / float(npids), "Creating missing canonical names...") update_personID_canonical_names([pid]) update_status_final("Done restoring canonical names.") def get_all_bibrecs(): return [x[0] for x in run_sql("select distinct bibrec from aidPERSONIDPAPERS")] +def get_bibrefrec_to_pid_flag_mapping(): + whole_table = run_sql("select bibref_table,bibref_value,bibrec,personid,flag from aidPERSONIDPAPERS") + ret = {} + for x in whole_table: + sig = (x[0], x[1], x[2]) + pid_flag = (x[3], x[4]) + ret[sig] = ret.get(sig , []) + [pid_flag] + return ret def remove_all_bibrecs(bibrecs): bibrecs_s = list_2_SQL_str(bibrecs) run_sql("delete from aidPERSONIDPAPERS where bibrec in %s" % bibrecs_s) def empty_results_table(): run_sql("TRUNCATE aidRESULTS") def save_cluster(named_cluster): name, cluster = named_cluster for bib in cluster.bibs: run_sql("INSERT INTO aidRESULTS " "(personid, bibref_table, bibref_value, bibrec) " "VALUES (%s, %s, %s, %s) " , (name, str(bib[0]), bib[1], bib[2])) def remove_result_cluster(name): run_sql("DELETE FROM aidRESULTS " - "WHERE personid like '%s%%'" + "WHERE personid like '%s.%%'" % name) def personid_name_from_signature(sig): ret = run_sql("select personid, name " "from aidPERSONIDPAPERS " "where bibref_table = %s and bibref_value = %s and bibrec = %s " "and flag > '-2'" , sig) assert len(ret) < 2, ret return ret def personid_from_signature(sig): ret = run_sql("select personid, flag " "from aidPERSONIDPAPERS " "where bibref_table = %s and bibref_value = %s and bibrec = %s " "and flag > '-2'" , sig) assert len(ret) < 2, ret return ret -def in_results(name): - return run_sql("select count(*) " - "from aidRESULTS " - "where personid like %s" - , (name + '.0',))[0][0] > 0 - def get_signature_info(sig): ret = run_sql("select personid, flag " "from aidPERSONIDPAPERS " "where bibref_table = %s and bibref_value = %s and bibrec = %s " "order by flag" , sig) return ret def get_claimed_papers(pid): return run_sql("select bibref_table, bibref_value, bibrec " "from aidPERSONIDPAPERS " "where personid = %s " "and flag > %s", (pid, 1)) def copy_personids(): run_sql("DROP TABLE IF EXISTS `aidPERSONIDDATA_copy`") run_sql("CREATE TABLE `aidPERSONIDDATA_copy` ( " - "`personid` BIGINT( 16 ) UNSIGNED NOT NULL , " + "`personid` BIGINT( 8 ) UNSIGNED NOT NULL , " "`tag` VARCHAR( 64 ) NOT NULL , " "`data` VARCHAR( 256 ) NOT NULL , " "`opt1` MEDIUMINT( 8 ) DEFAULT NULL , " "`opt2` MEDIUMINT( 8 ) DEFAULT NULL , " "`opt3` VARCHAR( 256 ) DEFAULT NULL , " "KEY `personid-b` ( `personid` ) , " "KEY `tag-b` ( `tag` ) , " "KEY `data-b` ( `data` ) , " "KEY `opt1` ( `opt1` ) " ") ENGINE = MYISAM DEFAULT CHARSET = utf8") run_sql("INSERT INTO `aidPERSONIDDATA_copy` " "SELECT * " "FROM `aidPERSONIDDATA`") run_sql("DROP TABLE IF EXISTS `aidPERSONIDPAPERS_copy`") run_sql("CREATE TABLE `aidPERSONIDPAPERS_copy` ( " - "`personid` bigint( 16 ) unsigned NOT NULL , " + "`personid` bigint( 8 ) unsigned NOT NULL , " "`bibref_table` enum( '100', '700' ) NOT NULL , " "`bibref_value` mediumint( 8 ) unsigned NOT NULL , " "`bibrec` mediumint( 8 ) unsigned NOT NULL , " "`name` varchar( 256 ) NOT NULL , " "`flag` smallint( 2 ) NOT NULL DEFAULT '0', " "`lcul` smallint( 2 ) NOT NULL DEFAULT '0', " "`last_updated` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP , " "KEY `personid-b` ( `personid` ) , " "KEY `reftable-b` ( `bibref_table` ) , " "KEY `refvalue-b` ( `bibref_value` ) , " "KEY `rec-b` ( `bibrec` ) , " "KEY `name-b` ( `name` ) , " "KEY `timestamp-b` ( `last_updated` ) , " "KEY `ptvrf-b` ( `personid` , `bibref_table` , `bibref_value` , `bibrec` , `flag` ) " ") ENGINE = MyISAM DEFAULT CHARSET = utf8") run_sql("INSERT INTO `aidPERSONIDPAPERS_copy` " "SELECT * " "FROM `aidPERSONIDPAPERS") def delete_empty_persons(): pp = run_sql("select personid from aidPERSONIDPAPERS") pp = set(p[0] for p in pp) pd = run_sql("select personid from aidPERSONIDDATA") pd = set(p[0] for p in pd) fpd = run_sql("select personid from aidPERSONIDDATA where tag <> 'canonical_name'") fpd = set(p[0] for p in fpd) to_delete = pd - (pp | fpd) if to_delete: run_sql("delete from aidPERSONIDDATA where personid in %s" % list_2_SQL_str(to_delete)) + def restore_personids(): run_sql("TRUNCATE `aidPERSONIDDATA`") run_sql("INSERT INTO `aidPERSONIDDATA` " "SELECT * " "FROM `aidPERSONIDDATA_copy`") run_sql("TRUNCATE `aidPERSONIDPAPERS`") run_sql("INSERT INTO `aidPERSONIDPAPERS` " "SELECT * " "FROM `aidPERSONIDPAPERS_copy") -def get_possible_personids_from_paperlist_old(bibrecreflist): - ''' - @param bibrecreflist: list of bibrecref couples, (('100:123,123',),) or bibrecs (('123',),) - returns a list of pids and connected bibrefs in order of number of bibrefs per pid - [ [['1'],['123:123.123','123:123.123']] , [['2'],['123:123.123']] ] - ''' - - pid_bibrecref_dict = {} - for b in bibrecreflist: - pids = [] - - try: - pids = run_sql("select personid from aidPERSONID " - "use index (`tdf-b`) where tag=%s and data=%s", ('paper', str(b[0]))) - except (OperationalError, ProgrammingError): - pids = run_sql("select personid from aidPERSONID " - "where tag=%s and data=%s", ('paper', str(b[0]))) - - for pid in pids: - if pid[0] in pid_bibrecref_dict: - pid_bibrecref_dict[pid[0]].append(str(b[0])) - else: - pid_bibrecref_dict[pid[0]] = [str(b[0])] - - pid_list = [[i, pid_bibrecref_dict[i]] for i in pid_bibrecref_dict] - - return sorted(pid_list, key=lambda k: len(k[2]), reverse=True) - - def resolve_affiliation(ambiguous_aff_string): """ This is a method available in the context of author disambiguation in ADS only. No other platform provides the db table used by this function. @warning: to be used in an ADS context only. @param ambiguous_aff_string: Ambiguous affiliation string @type ambiguous_aff_string: str @return: The normalized version of the name string as presented in the database @rtype: str """ if not ambiguous_aff_string or not bconfig.CFG_ADS_SITE: return "None" aff_id = run_sql("select aff_id from ads_affiliations where affstring=%s", (ambiguous_aff_string,)) if aff_id: return aff_id[0][0] else: return "None" + + +def get_free_pids(): + ''' + Returns an iterator with all free personids. + It's cool, because it fills holes. + ''' + all_pids = frozenset(x[0] for x in chain( + run_sql("select personid from aidPERSONIDPAPERS") , + run_sql("select personid from aidPERSONIDDATA"))) + return ifilter(lambda x: x not in all_pids, count()) + + +def remove_results_outside(many_names): + many_names = frozenset(many_names) + res_names = frozenset(x[0].split(".")[0] for x in run_sql("select personid from aidRESULTS")) + + for name in res_names - many_names: + run_sql("delete from aidRESULTS where personid like '%s.%%'" % name) + + +def get_signatures_from_bibrefs(bibrefs): + bib10x = ifilter(lambda x: x[0] == 100, bibrefs) + bib10x_s = list_2_SQL_str(bib10x, lambda x: x[1]) + bib70x = ifilter(lambda x: x[0] == 700, bibrefs) + bib70x_s = list_2_SQL_str(bib70x, lambda x: x[1]) + valid_recs = set(get_all_valid_bibrecs()) + + if bib10x_s != '()': + sig10x = run_sql("select 100, id_bibxxx, id_bibrec " + "from bibrec_bib10x " + "where id_bibxxx in %s" + % (bib10x_s,)) + else: + sig10x = () + + if bib70x_s != '()': + sig70x = run_sql("select 700, id_bibxxx, id_bibrec " + "from bibrec_bib70x " + "where id_bibxxx in %s" + % (bib70x_s,)) + else: + sig70x = () + + return ifilter(lambda x: x in valid_recs, chain(sig10x, sig70x)) + + +def get_all_valid_bibrecs(): + collection_restriction_pattern = " or ".join(["980__a:\"%s\"" % x for x in bconfig.LIMIT_TO_COLLECTIONS]) + return perform_request_search(p="%s" % collection_restriction_pattern, rg=0) diff --git a/modules/bibauthorid/lib/bibauthorid_general_utils.py b/modules/bibauthorid/lib/bibauthorid_general_utils.py index 51e78a864..762cffd59 100644 --- a/modules/bibauthorid/lib/bibauthorid_general_utils.py +++ b/modules/bibauthorid/lib/bibauthorid_general_utils.py @@ -1,84 +1,106 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ''' bibauthorid_general_utils Bibauthorid utilities used by many parts of the framework ''' import bibauthorid_config as bconfig - +from datetime import datetime +PRINT_TS = bconfig.DEBUG_TIMESTAMPS +PRINT_TS_US = bconfig.DEBUG_TIMESTAMPS_UPDATE_STATUS and PRINT_TS def __print_func(*args): + if PRINT_TS: + print datetime.now(), for arg in args: print arg, print "" def __dummy_print(*args): pass def __create_conditional_print(cond): if cond: return __print_func else: return __dummy_print bibauthor_print = __create_conditional_print(bconfig.DEBUG_OUTPUT) name_comparison_print = __create_conditional_print(bconfig.DEBUG_NAME_COMPARISON_OUTPUT) metadata_comparison_print = __create_conditional_print(bconfig.DEBUG_METADATA_COMPARISON_OUTPUT) wedge_print = __create_conditional_print(bconfig.DEBUG_WEDGE_OUTPUT) + if bconfig.DEBUG_OUTPUT: import sys - status_len = 65 + status_len = 68 comment_len = 40 def padd(stry, l): return stry[:l].ljust(l) - def update_status(percent, comment=""): - percent = int(percent * 100) - progress = padd("[%s%s] %d%% done" % ("#" * (percent / 2), "-" * (50 - percent / 2), percent), status_len) + def update_status(percent, comment="", print_ts=False): + filled = int(percent * 50) + bar = "[%s%s] " % ("#" * filled, "-" * (50 - filled)) + percent = ("%.2f%% done" % (percent * 100)).rjust(12) + progress = padd(bar + percent, status_len) comment = padd(comment, comment_len) + if print_ts or PRINT_TS_US: + print datetime.now(), print progress, comment, '\r', def update_status_final(comment=""): - update_status(1., comment) + update_status(1., comment, print_ts=PRINT_TS) print "" sys.stdout.flush() else: def update_status(percent, comment=""): pass def update_status_final(comment=""): pass -mem_file = '/tmp/tortoise_memory.log' - -def print_tortoise_memory_log(summary): - fp = open(mem_file, 'a') - stry = "PID:\t%s\tPEAK:\t%s\tEST:\t%s\tBIBS:\t%s\n" % (summary['pid'], summary['peak'], summary['est'], summary['bibs']) +def print_tortoise_memory_log(summary, fp): + stry = "PID:\t%s\tPEAK:\t%s,%s\tEST:\t%s\tBIBS:\t%s\n" % (summary['pid'], summary['peak1'], summary['peak2'], summary['est'], summary['bibs']) fp.write(stry) - fp.close() -def clear_tortoise_memory_log(): - fp = open(mem_file, 'w') - fp.close() +def parse_tortoise_memory_log(memfile_path): + f = open(memfile_path) + lines = f.readlines() + f.close() + + def line_2_dict(line): + line = line.split('\t') + ret = { 'mem1' : int(line[3].split(",")[0]), + 'mem2' : int(line[3].split(",")[1]), + 'est' : float(line[5]), + 'bibs' : int(line[7]) + } + return ret + + return map(line_2_dict, lines) + + +eps = 1e-6 +def is_eq(v1, v2): + return v1 + eps > v2 and v2 + eps > v1 diff --git a/modules/bibauthorid/lib/bibauthorid_least_squares.py b/modules/bibauthorid/lib/bibauthorid_least_squares.py index 054de7e91..178cabaf5 100644 --- a/modules/bibauthorid/lib/bibauthorid_least_squares.py +++ b/modules/bibauthorid/lib/bibauthorid_least_squares.py @@ -1,73 +1,91 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import operator -from itertools import izip, starmap +from itertools import izip, starmap, repeat -def approximate(xs, ys): +def approximate(xs, ys, power): assert len(xs) == len(ys) + matrix_size = power + 1 + variables = 2 * power + 1 + xs = map(float, xs) ys = map(float, ys) - xs0 = [1] * len(xs) - xs1 = xs - xs2 = list(starmap(operator.mul, izip(xs, xs))) - xs3 = list(starmap(operator.mul, izip(xs, xs2))) - xs4 = starmap(operator.mul, izip(xs, xs3)) - - xs = [xs0, xs1, xs2, xs3, xs4] + xs = reduce(lambda x, y: x + [list(starmap(operator.mul, izip(x[-1], y)))], repeat(xs, variables - 1), [[1] * len(xs)]) + assert len(xs) == variables s = map(sum, xs) - assert s[0] == len(ys) - b = [sum(starmap(operator.mul, izip(ys, x))) for x in xs[:3]] - a = [s[i:i+3] for i in xrange(3)] + b = [sum(starmap(operator.mul, izip(ys, x))) for x in xs[:matrix_size]] + a = [s[i:i + matrix_size] for i in xrange(matrix_size)] # So, we have a*x = b and we are looking for x - matr = [ai + [bi] for ai, bi in izip(a, b)] def unify_row(i, j): - matr[i] = [matr[i][k] / matr[i][j] for k in xrange(len(matr[i]))] + matr[i] = [cell / matr[i][j] for cell in matr[i]] + assert matr[i][j] == 1 def subtract_row(i, j, row): assert matr[i][j] == 1 matr[row] = [matr[row][k] - matr[i][k] * matr[row][j] for k in xrange(len(matr[i]))] assert matr[row][j] == 0 - unify_row(0, 0) - subtract_row(0, 0, 1) - subtract_row(0, 0, 2) - unify_row(1, 1) - subtract_row(1, 1, 2) - unify_row(2, 2) - subtract_row(2, 2, 1) - subtract_row(2, 2, 0) - subtract_row(1, 1, 0) +# NOTE: Example for matrix_size = 3 +# unify_row(0, 0) +# subtract_row(0, 0, 1) +# subtract_row(0, 0, 2) +# unify_row(1, 1) +# subtract_row(1, 1, 2) +# unify_row(2, 2) +# subtract_row(2, 2, 1) +# subtract_row(2, 2, 0) +# subtract_row(1, 1, 0) + + for i in xrange(matrix_size): + unify_row(i, i) + for j in xrange(matrix_size - i - 1): + subtract_row(i, i, i + j + 1) + + for i in xrange(matrix_size): + for j in xrange(matrix_size - i - 1): + subtract_row(matrix_size - i - 1, matrix_size - i - 1, j) + + assert all(matr[i][:matrix_size] == ([0] * i) + [1] + ([0] * (matrix_size - 1 - i)) for i in xrange(matrix_size)) + + ret = map(operator.itemgetter(matrix_size), matr) + + return ret - assert matr[0][0:3] == [1, 0, 0] - assert matr[1][0:3] == [0, 1, 0] - assert matr[2][0:3] == [0, 0, 1] - return map(operator.itemgetter(3), matr) +def to_function(poly): + power = len(poly) - 1 + def func(x): + arr = [1.] + for i in xrange(power): + arr.append(arr[-1] * x) + assert len(arr) == len(poly) + return sum(p * x for p, x in izip(poly, arr)) + return func diff --git a/modules/bibauthorid/lib/bibauthorid_merge.py b/modules/bibauthorid/lib/bibauthorid_merge.py index e451bfaaa..3228d6935 100644 --- a/modules/bibauthorid/lib/bibauthorid_merge.py +++ b/modules/bibauthorid/lib/bibauthorid_merge.py @@ -1,152 +1,391 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. from operator import itemgetter from itertools import groupby, chain, imap, izip from bibauthorid_general_utils import update_status \ , update_status_final from bibauthorid_matrix_optimization import maximized_mapping from bibauthorid_backinterface import update_personID_canonical_names from bibauthorid_backinterface import get_existing_result_clusters from bibauthorid_backinterface import get_lastname_results from bibauthorid_backinterface import personid_name_from_signature from bibauthorid_backinterface import personid_from_signature from bibauthorid_backinterface import move_signature from bibauthorid_backinterface import get_claimed_papers from bibauthorid_backinterface import get_new_personid from bibauthorid_backinterface import find_conflicts +from bibauthorid_backinterface import get_free_pids from bibauthorid_backinterface import get_signature_info from bibauthorid_dbinterface import delete_empty_persons +from bibauthorid_dbinterface import get_bibrefrec_to_pid_flag_mapping -def merge(): +def merge_static(): ''' This function merges aidPERSONIDPAPERS with aidRESULTS. Use it after tortoise. + This function is static: if aid* tables are changed while it's running, + probably everything will crash and a black hole will open, eating all your data. + ''' + class Sig(object): + def __init__(self, bibrefrec, pid_flag): + self.rejected = dict(filter(lambda p: p[1] <= -2, pid_flag)) + self.assigned = filter(lambda p: -2 < p[1] and p[1] < 2, pid_flag) + self.claimed = filter(lambda p: 2 <= p[1], pid_flag) + self.bibrefrec = bibrefrec + + assert self.invariant() + + def invariant(self): + return len(self.assigned) + len(self.claimed) <= 1 + + def empty(self): + return not self.isclaimed and not self.isassigned + + def isclaimed(self): + return len(self.claimed) == 1 + + def get_claimed(self): + return self.claimed[0][0] + + def get_assigned(self): + return self.assigned[0][0] + + def isassigned(self): + return len(self.assigned) == 1 + + def isrejected(self, pid): + return pid in self.rejected + + def change_pid(self, pid): + assert self.invariant() + assert self.isassigned() + self.assigned = [(pid, 0)] + move_signature(self.bibrefrec, pid) + + class Cluster(object): + def __init__(self, pid, sigs): + self.pid = pid + + self.sigs = dict((sig.bibrefrec[2], sig) for sig in sigs if not sig.empty()) + + def send_sig(self, other, sig): + paper = sig.bibrefrec[2] + assert paper in self.sigs and paper not in other.sigs + + del self.sigs[paper] + other.sigs[paper] = sig + + if sig.isassigned(): + sig.change_pid(other.pid) + + last_names = frozenset(name[0].split('.')[0] for name in get_existing_result_clusters()) + + personid = get_bibrefrec_to_pid_flag_mapping() + free_pids = get_free_pids() + + for idx, last in enumerate(last_names): + update_status(float(idx) / len(last_names), "Merging, %d/%d current: %s" % (idx, len(last_names), last)) + + results = ((int(row[0].split(".")[1]), row[1:4]) for row in get_lastname_results(last)) + + # [(last name number, [bibrefrecs])] + results = [(k, map(itemgetter(1), d)) for k, d in groupby(sorted(results, key=itemgetter(0)), key=itemgetter(0))] + + # List of dictionaries. + # [{new_pid -> N}] + matr = [] + + # Set of all old pids. + old_pids = set() + + for k, ds in results: + pids = [] + for d in ds: + pid_flag = filter(lambda x: x[1] > -2, personid.get(d, [])) + if pid_flag: + assert len(pid_flag) == 1 + pid = pid_flag[0][0] + pids.append(pid) + old_pids.add(pid) + + matr.append(dict((k, len(list(d))) for k, d in groupby(sorted(pids)))) + + old_pids = list(old_pids) + best_match = maximized_mapping([[row.get(old, 0) for old in old_pids] for row in matr]) + + # [[bibrefrecs] -> pid] + matched_clusters = [(results[new_idx][1], old_pids[old_idx]) for new_idx, old_idx, unused in best_match] + not_matched_clusters = frozenset(xrange(len(results))) - frozenset(imap(itemgetter(0), best_match)) + not_matched_clusters = izip((results[i][1] for i in not_matched_clusters), free_pids) + + # pid -> Cluster + clusters = dict((pid, Cluster(pid, [Sig(bib, personid.get(bib, [])) for bib in sigs])) + for sigs, pid in chain(matched_clusters, not_matched_clusters)) + + todo = clusters.items() + for pid, clus in todo: + assert clus.pid == pid + + for paper, sig in clus.sigs.items(): + if sig.isclaimed(): + if sig.get_claimed() != pid: + target_clus = clusters[sig.get_claimed()] + + if paper in target_clus.sigs: + new_clus = Cluster(free_pids.next(), []) + target_clus.send_sig(new_clus, target_clus[paper]) + todo.append(new_clus) + clusters[new_clus.pid] = new_clus + + assert paper not in target_clus.sigs + clus.send_sig(target_clus, sig) + elif sig.get_assigned() != pid: + if not sig.isrejected(pid): + move_signature(sig.bibrefrec, pid) + else: + move_signature(sig.bibrefrec, free_pids.next()) + else: + assert not sig.isrejected(pid) + + update_status_final("Merging done.") + + update_status_final() + delete_empty_persons() + update_personID_canonical_names() + +def merge_static_oldstyle(): + ''' + This function merges aidPERSONIDPAPERS with aidRESULTS. + Use it after tortoise. + This function is static: if aid* tables are changed while it's running, + probably everything will crash and a black hole will open, eating all your data. + ''' + last_names = frozenset(name[0].split('.')[0] for name in get_existing_result_clusters()) + + def get_free_pids(): + while True: + yield get_new_personid() + + free_pids = get_free_pids() + + current_mapping = get_bibrefrec_to_pid_flag_mapping() + + def move_sig_and_update_mapping(sig, old_pid_flag, new_pid_flag): + move_signature(sig, new_pid_flag[0]) + current_mapping[sig].remove(old_pid_flag) + current_mapping[sig].append(new_pid_flag) + + def try_move_signature(sig, target_pid): + """ + """ + paps = current_mapping[sig] + rejected = filter(lambda p: p[1] <= -2, paps) + assigned = filter(lambda p:-2 < p[1] and p[1] < 2, paps) + claimed = filter(lambda p: 2 <= p[1] and p[0] == target_pid, paps) + + if claimed or not assigned or assigned[0] == target_pid: + return + + assert len(assigned) == 1 + + if rejected: + newpid = free_pids.next() + move_sig_and_update_mapping(sig, assigned[0], (newpid, assigned[0][1])) + else: + conflicts = find_conflicts(sig, target_pid) + if not conflicts: + move_sig_and_update_mapping(sig, assigned[0], (target_pid, assigned[0][1])) + else: + assert len(conflicts) == 1 + if conflicts[0][3] == 2: + newpid = free_pids.next() + move_sig_and_update_mapping(sig, assigned[0], (newpid, assigned[0][1])) + else: + newpid = free_pids.next() + csig = tuple(conflicts[0][:3]) + move_sig_and_update_mapping(csig, (target_pid, conflicts[0][3]), (newpid, conflicts[0][3])) + move_sig_and_update_mapping(sig, assigned[0], (target_pid, assigned[0][1])) + + for idx, last in enumerate(last_names): + update_status(float(idx) / len(last_names), "%d/%d current: %s" % (idx, len(last_names), last)) + + results = ((int(row[0].split(".")[1]), row[1:4]) for row in get_lastname_results(last)) + + # [(last name number, [bibrefrecs])] + results = [(k, map(itemgetter(1), d)) for k, d in groupby(sorted(results, key=itemgetter(0)), key=itemgetter(0))] + + # List of dictionaries. + # [{new_pid -> N}] + matr = [] + + # Set of all old pids. + old_pids = set() + + for k, ds in results: + pids = [] + claim = [] + for d in ds: + pid_flag = current_mapping.get(d, []) + if pid_flag: + pid, flag = pid_flag[0] + pids.append(pid) + old_pids.add(pid) + if flag > 1: + claim.append((d, pid)) + + matr.append(dict((k, len(list(d))) for k, d in groupby(sorted(pids)))) + + # We cast it to list in order to ensure the order persistence. + old_pids = list(old_pids) + best_match = maximized_mapping([[row.get(old, 0) for old in old_pids] for row in matr]) + + matched_clusters = [(results[new_idx][1], old_pids[old_idx]) for new_idx, old_idx, unused in best_match] + not_matched_clusters = frozenset(xrange(len(results))) - frozenset(imap(itemgetter(0), best_match)) + not_matched_clusters = izip((results[i][1] for i in not_matched_clusters), free_pids) + + for sigs, pid in chain(matched_clusters, not_matched_clusters): + for sig in sigs: + if sig in current_mapping: + if not pid in map(itemgetter(0), filter(lambda x: x[1] > -2, current_mapping[sig])): + try_move_signature(sig, pid) + + update_status_final() + delete_empty_persons() + update_personID_canonical_names() + +def merge_dynamic(): + ''' + This function merges aidPERSONIDPAPERS with aidRESULTS. + Use it after tortoise. + This function is dynamic: it allows aid* tables to be changed while it is still running, + hence the claiming faciity for example can stay online during the merge. This comfort + however is paid off in term of speed. ''' last_names = frozenset(name[0].split('.')[0] for name in get_existing_result_clusters()) def get_free_pids(): while True: yield get_new_personid() free_pids = get_free_pids() def try_move_signature(sig, target_pid): """ """ paps = get_signature_info(sig) - claimed = filter(lambda p: p[1] <= -2, paps) + rejected = filter(lambda p: p[1] <= -2, paps) assigned = filter(lambda p:-2 < p[1] and p[1] < 2, paps) - rejected = filter(lambda p: 2 <= p[1] and p[0] == target_pid, paps) + claimed = filter(lambda p: 2 <= p[1] and p[0] == target_pid, paps) if claimed or not assigned or assigned[0] == target_pid: return assert len(assigned) == 1 if rejected: move_signature(sig, free_pids.next()) else: conflicts = find_conflicts(sig, target_pid) if not conflicts: move_signature(sig, target_pid) else: assert len(conflicts) == 1 if conflicts[0][3] == 2: move_signature(sig, free_pids.next()) else: move_signature(conflicts[0][:3], free_pids.next()) move_signature(sig, target_pid) for idx, last in enumerate(last_names): update_status(float(idx) / len(last_names), "%d/%d current: %s" % (idx, len(last_names), last)) results = ((int(row[0].split(".")[1]), row[1:4]) for row in get_lastname_results(last)) # [(last name number, [bibrefrecs])] results = [(k, map(itemgetter(1), d)) for k, d in groupby(sorted(results, key=itemgetter(0)), key=itemgetter(0))] # List of dictionaries. # [{new_pid -> N}] matr = [] # Set of all old pids. old_pids = set() for k, ds in results: pids = [] claim = [] for d in ds: pid_flag = personid_from_signature(d) if pid_flag: pid, flag = pid_flag[0] pids.append(pid) old_pids.add(pid) if flag > 1: claim.append((d, pid)) matr.append(dict((k, len(list(d))) for k, d in groupby(sorted(pids)))) # We cast it to list in order to ensure the order persistence. old_pids = list(old_pids) best_match = maximized_mapping([[row.get(old, 0) for old in old_pids] for row in matr]) matched_clusters = [(results[new_idx][1], old_pids[old_idx]) for new_idx, old_idx, unused in best_match] not_matched_clusters = frozenset(xrange(len(results))) - frozenset(imap(itemgetter(0), best_match)) not_matched_clusters = izip((results[i][1] for i in not_matched_clusters), free_pids) for sigs, pid in chain(matched_clusters, not_matched_clusters): for sig in sigs: try_move_signature(sig, pid) update_status_final() delete_empty_persons() update_personID_canonical_names() def matched_claims(inspect=None): ''' Checks how many claims are violated in aidRESULTS. Returs the number of preserved and the total number of claims. ''' last_names = frozenset(name[0].split('.')[0] for name in get_existing_result_clusters()) r_match = 0 r_total = 0 for lname in last_names: if inspect and lname != inspect: continue results_dict = dict(((row[1], row[2], row[3]), int(row[0].split(".")[1])) for row in get_lastname_results(lname)) results_clusters = max(results_dict.values()) + 1 assert frozenset(results_dict.values()) == frozenset(range(results_clusters)) pids = frozenset(x[0] for x in chain.from_iterable(personid_name_from_signature(r) for r in results_dict.keys())) matr = ((results_dict[x] for x in get_claimed_papers(pid) if x in results_dict) for pid in pids) matr = (dict((k, len(list(d))) for k, d in groupby(sorted(row))) for row in matr) matr = [[row.get(i, 0) for i in xrange(results_clusters)] for row in matr] r_match += sum(m[2] for m in maximized_mapping(matr)) r_total += sum(sum(row) for row in matr) return r_match, r_total diff --git a/modules/bibauthorid/lib/bibauthorid_name_utils.py b/modules/bibauthorid/lib/bibauthorid_name_utils.py index 4041bd0a6..59776b6d9 100644 --- a/modules/bibauthorid/lib/bibauthorid_name_utils.py +++ b/modules/bibauthorid/lib/bibauthorid_name_utils.py @@ -1,789 +1,774 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ''' bibauthorid_name_utils Bibauthorid utilities used by many parts of the framework ''' import re import bibauthorid_config as bconfig from bibauthorid_string_utils import string_partition from copy import deepcopy from bibauthorid_general_utils import name_comparison_print try: from invenio.config import CFG_ETCDIR NO_CFG_ETCDIR = False except ImportError: NO_CFG_ETCDIR = True try: from editdist import distance except ImportError: try: from Levenshtein import distance except ImportError: name_comparison_print("Levenshtein Module not available!") def distance(s1, s2): d = {} lenstr1 = len(s1) lenstr2 = len(s2) for i in xrange(-1, lenstr1 + 1): d[(i, -1)] = i + 1 for j in xrange(-1, lenstr2 + 1): d[(-1, j)] = j + 1 for i in xrange(0, lenstr1): for j in xrange(0, lenstr2): if s1[i] == s2[j]: cost = 0 else: cost = 1 d[(i, j)] = min( d[(i - 1, j)] + 1, # deletion d[(i, j - 1)] + 1, # insertion d[(i - 1, j - 1)] + cost, # substitution ) if i > 1 and j > 1 and s1[i] == s2[j - 1] and s1[i - 1] == s2[j]: d[(i, j)] = min (d[(i, j)], d[i - 2, j - 2] + cost) # transposition return d[lenstr1 - 1, lenstr2 - 1] +artifact_removal = re.compile("[^a-zA-Z0-9]") #Gender names and names variation files are loaded updon module import to increase performances def split_name_parts(name_string, delete_name_additions=True, override_surname_sep='', return_all_lower=False): ''' Splits name_string in three arrays of strings : surname, initials (without trailing dot), names RETURNS an array containing a string and two arrays of strings. delete_name_additions defines if extensions e.g. Jr., (Ed.) or (spokesperson) will be ignored @param name_string: the name to be spli @type name: string @param delete_name_additions: determines whether to delete name additions @type delete_name_additions: boolean @param override_surname_sep: Define alternative surname separator @type override_surname_sep: string @param reverse_name_surname: if true names come first @return: list of [surname string, initials list, names list] e.g. split_name_parts("Ellis, John R.") --> ['Ellis', ['J', 'R'], ['John'], [0]] --> ['Ellis', ['K', 'J', 'R'], ['John', 'Rob'], [1,2]] @rtype: list of lists ''' if not override_surname_sep: surname_separators = bconfig.SURNAMES_SEPARATOR_CHARACTER_LIST else: surname_separators = ',' name_separators = bconfig.NAMES_SEPARATOR_CHARACTER_LIST if name_separators == "-1": name_separators = ',;.=\-\(\)' if delete_name_additions: name_additions = re.findall('\([.]*[^\)]*\)', name_string) for name_addition in name_additions: name_string = name_string.replace(name_addition, '') surname = "" rest_of_name = "" found_sep = '' name_string = name_string.strip() for sep in surname_separators: if name_string.count(sep) >= 1: found_sep = sep surname, rest_of_name = string_partition(name_string, sep)[0::2] surname = surname.strip().capitalize() break if not found_sep: if name_string.count(" ") > 0: rest_of_name, surname = string_partition(name_string, ' ', direc='r')[0::2] surname = surname.strip().capitalize() else: return [name_string.strip().capitalize(), [], [], []] if rest_of_name.count(","): rest_of_name = string_partition(rest_of_name, ",")[0] substitution_regexp = re.compile('[%s]' % (name_separators)) initials_names_list = substitution_regexp.sub(' ', rest_of_name).split() names = [] initials = [] positions = [] pos_counter = 0 for i in initials_names_list: if len(i) == 1: initials.append(i.capitalize()) pos_counter += 1 else: names.append(i.strip().capitalize()) initials.append(i[0].capitalize()) positions.append(pos_counter) pos_counter += 1 retval = [surname, initials, names, positions] if return_all_lower: retval = [surname.lower(), [i.lower() for i in initials], [n.lower() for n in names], positions] return retval def create_canonical_name(name): canonical_name = create_unified_name(name, reverse=True) artifact_removal = re.compile("[^a-zA-Z0-9]") whitespace_removal = re.compile("[ ]{1,10}") canonical_name = artifact_removal.sub(" ", canonical_name) canonical_name = whitespace_removal.sub(" ", canonical_name) canonical_name = canonical_name.strip().replace(" ", ".") return canonical_name def create_normalized_name(splitted_name): ''' Creates a normalized name from a given name array. A normalized name looks like "Lastname, Firstnames and Initials" @param splitted_name: name array from split_name_parts @type splitted_name: list in form [string, list, list] @return: normalized name @rtype: string ''' name = splitted_name[0] + ',' if not splitted_name[1] and not splitted_name[2]: return name for i in range(len(splitted_name[1])): try: fname = splitted_name[2][splitted_name[3].index(i)] name = name + ' ' + fname except (IndexError, ValueError): name = name + ' ' + splitted_name[1][i] + '.' return name def create_unified_name(name, reverse=False): ''' Creates unified name. E.g. Ellis, John Richard T. (Jr.) will become Ellis, J. R. T. @param name: The name to be unified @type name: string @param reverse: if true, names come first @return: The unified name @rtype: string ''' split_name = split_name_parts(name) if reverse: unified_name = '' for i in split_name[1]: unified_name += "%s. " % (i) unified_name += "%s" % (split_name[0]) else: unified_name = "%s, " % (split_name[0]) for i in split_name[1]: unified_name += "%s. " % (i) if unified_name.count("ollabo"): unified_name = unified_name.replace("ollaborations", "ollaboration") unified_name = unified_name.replace("The ", "") unified_name = unified_name.replace("the ", "") unified_name = unified_name.replace("For ", "") unified_name = unified_name.replace("for ", "") return unified_name def clean_name_string(namestring, replacement=" ", keep_whitespace=True, trim_whitespaces=False): ''' remove specific artifacts from the names in order to be able to compare them. E.g. 't Hooft, G. and t'Hooft, G. @param namestring: the string to be cleaned @type namestring: string ''' # artifact_removal = re.compile("['`\-\[\]\_\"]") artifact_removal = None if trim_whitespaces: namestring.strip() if keep_whitespace: artifact_removal = re.compile("[^a-zA-Z0-9,.\s]") else: artifact_removal = re.compile("[^a-zA-Z0-9,.]") whitespace_removal = re.compile("[\s]{2,100}") tmp = artifact_removal.sub(replacement, namestring) return whitespace_removal.sub(" ", tmp).strip() def soft_compare_names(origin_name, target_name): ''' Soft comparison of names, to use in search engine an similar Base results: If surname is equal in [0.6,1.0] If surname similar in [0.4,0.8] If surname differs in [0.0,0.4] all depending on average compatibility of names and initials. ''' jaro_fctn = distance # try: # from Levenshtein import jaro_winkler # jaro_fctn = jaro_winkler # except ImportError: # jaro_fctn = jaro_winkler_str_similarity score = 0.0 oname = deepcopy(origin_name) tname = deepcopy(target_name) orig_name = split_name_parts(oname.lower()) targ_name = split_name_parts(tname.lower()) orig_name[0] = clean_name_string(orig_name[0], replacement="", keep_whitespace=False) targ_name[0] = clean_name_string(targ_name[0], replacement="", keep_whitespace=False) if orig_name[0] == targ_name[0]: score += 0.6 else: if ((jaro_fctn(orig_name[0].lower(), targ_name[0].lower()) < .95) or min(len(orig_name[0]), len(targ_name[0])) <= 4): score += 0.0 else: score += 0.4 if orig_name[1] and targ_name[1]: max_initials = max(len(orig_name[1]), len(targ_name[1])) matching_i = 0 if len(orig_name[1]) >= 1 and len(targ_name[1]) >= 1: for i in orig_name[1]: if i in targ_name[1]: matching_i += 1 max_names = max(len(orig_name[2]), len(targ_name[2])) matching_n = 0 if len(orig_name[2]) >= 1 and len(targ_name[2]) >= 1: cleaned_targ_name = [clean_name_string(i, replacement="", keep_whitespace=False) for i in targ_name[2]] for i in orig_name[2]: if clean_name_string(i, replacement="", keep_whitespace=False) in cleaned_targ_name: matching_n += 1 name_score = (matching_i + matching_n) * 0.4 / (max_names + max_initials) score += name_score return score def create_name_tuples(names): ''' Find name combinations, i.e. permutations of the names in different positions of the name @param names: a list of names @type names: list of string @return: the combinations of the names given @rtype: list of lists of strings ''' length = float(len(names)) max_tuples = int((length / 2) * (length - 1)) current_tuple = 1 pos = 0 off = 1 variants = [" ".join(names)] for i in range(max_tuples): variant = "%s %s %s" % (' '.join(names[0:pos]), ''.join(names[pos:off + 1]).capitalize(), ' '.join(names[off + 1::])) variants.append(variant.strip()) pos += 1 off += 1 if off >= length: pos = i * 0 off = current_tuple + 1 current_tuple += 1 return variants def full_names_are_equal_composites(name1, name2): ''' Checks if names are equal composites; e.g. "guangsheng" vs. "guang sheng" @param name1: Full Name string of the first name (w/ last name) @type name1: string @param name2: Full Name string of the second name (w/ last name) @type name2: string @return: Are the names equal composites? @rtype: boolean ''' if not isinstance(name1, list): name1 = split_name_parts(name1) if not isinstance(name2, list): name2 = split_name_parts(name2) is_equal_composite = False oname_variations = create_name_tuples(name1[2]) tname_variations = create_name_tuples(name2[2]) for oname_variation in oname_variations: for tname_variation in tname_variations: oname = clean_name_string(oname_variation.lower(), "", False, True) tname = clean_name_string(tname_variation.lower(), "", False, True) if oname == tname: is_equal_composite = True break return is_equal_composite -def names_are_equal_gender(name1, name2, gendernames): - ''' - Checks if names have the same gender - @param gendernames: dictionary male/female names - ''' - g1 = [name1 in gendernames['boys'], name1 in gendernames['girls']] - g2 = [name2 in gendernames['boys'], name2 in gendernames['girls']] - - if (g1[0] == g2[0] == True) and (g1[1] == False or g2[1] == False): - return True - if (g1[1] == g2[1] == True) and (g1[0] == False or g2[0] == False): - return True - return False - def full_names_are_equal_gender(name1, name2, gendernames): ''' Checks on gender equality of two first names baes on a word list @param name1: Full Name string of the first name (w/ last name) @type name1: string @param name2: Full Name string of the second name (w/ last name) @type name2: string @param gendernames: dictionary of male/female names @type gendernames: dict @return: Are names gender-equal? @rtype: boolean ''' if not isinstance(name1, list): name1 = split_name_parts(name1) if not isinstance(name2, list): name2 = split_name_parts(name2) names_are_equal_gender_b = True ogender = None tgender = None # oname = name1[2][0].lower() # tname = name2[2][0].lower() # oname = clean_name_string(oname, "", False, True) # tname = clean_name_string(tname, "", False, True) onames = [clean_name_string(n.lower(), "", False, True) for n in name1[2]] tnames = [clean_name_string(n.lower(), "", False, True) for n in name2[2]] for oname in onames: if oname in gendernames['boys']: if ogender != 'Conflict': if ogender != 'Female': ogender = 'Male' else: ogender = 'Conflict' elif oname in gendernames['girls']: if ogender != 'Conflict': if ogender != 'Male': ogender = 'Female' else: ogender = 'Conflict' for tname in tnames: if tname in gendernames['boys']: if tgender != 'Conflict': if tgender != 'Female': tgender = 'Male' else: tgender = 'Conflict' elif tname in gendernames['girls']: if tgender != 'Conflict': if tgender != 'Male': tgender = 'Female' else: tgender = 'Conflict' if ogender and tgender: if ogender != tgender or ogender == 'Conflict' or tgender == 'Conflict': names_are_equal_gender_b = False return names_are_equal_gender_b def names_are_synonymous(name1, name2, name_variations): ''' Checks if names are synonims @param name_variations: name variations list @type name_variations: list of lists ''' a = [name1 in nvar and name2 in nvar for nvar in name_variations] if True in a: return True return False def full_names_are_synonymous(name1, name2, name_variations): ''' Checks if two names are synonymous; e.g. "Robert" vs. "Bob" @param name1: Full Name string of the first name (w/ last name) @type name1: string @param name2: Full Name string of the second name (w/ last name) @type name2: string @param name_variations: name variations list @type name_variations: list of lists @return: are names synonymous @rtype: boolean ''' if not isinstance(name1, list): name1 = split_name_parts(name1) if not isinstance(name2, list): name2 = split_name_parts(name2) names_are_synonymous_b = False max_matches = min(len(name1[2]), len(name2[2])) matches = [] for i in xrange(max_matches): matches.append(False) for nvar in name_variations: for i in xrange(max_matches): oname = name1[2][i].lower() tname = name2[2][i].lower() oname = clean_name_string(oname, "", False, True) tname = clean_name_string(tname, "", False, True) if (oname in nvar and tname in nvar) or oname == tname: name_comparison_print(' ', oname, ' and ', tname, ' are synonyms!') matches[i] = True if sum(matches) == max_matches: names_are_synonymous_b = True break return names_are_synonymous_b def names_are_substrings(name1, name2): ''' Checks if the names are subtrings of each other, left to right @return: bool ''' return name1.startswith(name2) or name2.startswith(name1) def full_names_are_substrings(name1, name2): ''' Checks if two names are substrings of each other; e.g. "Christoph" vs. "Ch" Only checks for the beginning of the names. @param name1: Full Name string of the first name (w/ last name) @type name1: string @param name2: Full Name string of the second name (w/ last name) @type name2: string @return: are names synonymous @rtype: boolean ''' if not isinstance(name1, list): name1 = split_name_parts(name1) if not isinstance(name2, list): name2 = split_name_parts(name2) onames = name1[2] tnames = name2[2] # oname = "".join(onames).lower() # tname = "".join(tnames).lower() names_are_substrings_b = False for o in onames: oname = clean_name_string(o.lower(), "", False, True) for t in tnames: tname = clean_name_string(t.lower(), "", False, True) if (oname.startswith(tname) or tname.startswith(oname)): names_are_substrings_b = True return names_are_substrings_b def _load_gender_firstnames_dict(files=''): if not NO_CFG_ETCDIR and not files: files = {'boy': CFG_ETCDIR + '/bibauthorid/name_authority_files/male_firstnames.txt', 'girl': CFG_ETCDIR + '/bibauthorid/name_authority_files/female_firstnames.txt'} elif NO_CFG_ETCDIR and not files: files = {'boy': '../etc/name_authority_files/male_firstnames.txt', 'girl': '../etc/name_authority_files/female_firstnames.txt'} boyf = open(files['boy'], 'r') boyn = set([x.strip().lower() for x in boyf.readlines()]) boyf.close() girlf = open(files['girl'], 'r') girln = set([x.strip().lower() for x in girlf.readlines()]) girlf.close() - return {'boys':list(boyn - girln), 'girls':list(girln - boyn)} + return {'boys':(boyn - girln), 'girls':(girln - boyn)} def _load_firstname_variations(filename=''): #will load an array of arrays: [['rick','richard','dick'],['john','jhonny']] if not NO_CFG_ETCDIR and not filename: filename = CFG_ETCDIR + '/bibauthorid/name_authority_files/name_variants.txt' elif NO_CFG_ETCDIR and not filename: filename = '../etc/name_authority_files/name_variants.txt' retval = [] r = re.compile("\n") fp = open(filename) for l in fp.readlines(): lr = r.sub("", l) retval.append([clean_name_string(name.lower(), "", False, True) for name in lr.split(";") if name]) fp.close() return retval def compare_names(origin_name, target_name, initials_penalty=False): ''' Compare two names. ''' MAX_ALLOWED_SURNAME_DISTANCE = 2 name_comparison_print("\nComparing: " , origin_name, ' ', target_name) gendernames = GLOBAL_gendernames name_variations = GLOBAL_name_variations no = split_name_parts(origin_name, True, "", True) nt = split_name_parts(target_name, True, "", True) name_comparison_print("|- splitted no: ", no) name_comparison_print("|- splitted nt: ", nt) score = 0.0 surname_dist = distance(no[0], nt[0]) name_comparison_print("|- surname distance: ", surname_dist) if surname_dist > 0: artifact_removal = re.compile("[^a-zA-Z0-9]") fn1 = artifact_removal.sub("", no[0]) fn2 = artifact_removal.sub("", nt[0]) if fn1 == fn2: score = 1.0 else: score = max(0.0, 0.5 - (float(surname_dist) / float(MAX_ALLOWED_SURNAME_DISTANCE))) else: score = 1.0 name_comparison_print('||- surname score: ', score) initials_only = ((min(len(no[2]), len(nt[2]))) == 0) only_initials_available = False if len(no[2]) == len(nt[2]) and initials_only: only_initials_available = True name_comparison_print('|- initials only: ', initials_only) name_comparison_print('|- only initials available: ', only_initials_available) names_are_equal_composites = False if not initials_only: names_are_equal_composites = full_names_are_equal_composites(origin_name, target_name) name_comparison_print("|- equal composites: ", names_are_equal_composites) max_n_initials = max(len(no[1]), len(nt[1])) initials_intersection = set(no[1]).intersection(set(nt[1])) n_initials_intersection = len(initials_intersection) initials_union = set(no[1]).union(set(nt[1])) n_initials_union = len(initials_union) initials_distance = distance("".join(no[1]), "".join(nt[1])) if n_initials_union > 0: initials_c = float(n_initials_intersection) / float(n_initials_union) else: initials_c = 1 if len(no[1]) > len(nt[1]): alo = no[1] alt = nt[1] else: alo = nt[1] alt = no[1] lo = len(alo) lt = len(alt) if max_n_initials > 0: initials_screwup = sum([i + 1 for i, k in enumerate(reversed(alo)) if lo - 1 - i < lt and k != alt[lo - 1 - i] ]) / \ float(float(max_n_initials * (max_n_initials + 1)) / 2) initials_distance = initials_distance / max_n_initials else: initials_screwup = 0 initials_distance = 0 score = score - (0.75 * initials_screwup + 0.10 * (1 - initials_c)\ + 0.15 * initials_distance) * (score) name_comparison_print("|- initials sets: ", no[1], " ", nt[1]) name_comparison_print("|- initials distance: ", initials_distance) name_comparison_print("|- initials c: ", initials_c) name_comparison_print("|- initials screwup: ", initials_screwup) name_comparison_print("||- initials score: ", score) composits_eq = full_names_are_equal_composites(no, nt) if len(no[2]) > 0 and len(nt[2]) > 0: gender_eq = full_names_are_equal_gender(no, nt, gendernames) else: gender_eq = True vars_eq = full_names_are_synonymous(no, nt, name_variations) substr_eq = full_names_are_substrings(no, nt) if not initials_only: if len(no[2]) > len(nt[2]): nalo = no[2] nalt = nt[2] else: nalo = nt[2] nalt = no[2] nlo = len(nalo) nlt = len(nalt) names_screwup_list = [(distance(k, nalt[nlo - 1 - i]), max(len(k), len(nalt[nlo - 1 - i]))) for i, k in enumerate(reversed(nalo)) \ if nlo - 1 - i < nlt] max_names_screwup = max([float(i[0]) / i[1] for i in names_screwup_list]) avg_names_screwup = sum([float(i[0]) / i[1] for i in names_screwup_list])\ / len(names_screwup_list) else: max_names_screwup = 0 avg_names_screwup = 0 score = score - score * 0.75 * max_names_screwup - score * 0.25 * avg_names_screwup name_comparison_print("|- max names screwup: ", max_names_screwup) name_comparison_print("|- avg screwup: ", avg_names_screwup) name_comparison_print("||- names score: ", score) name_comparison_print("|- names composites: ", composits_eq) name_comparison_print("|- same gender: ", gender_eq) name_comparison_print("|- synonims: ", vars_eq) name_comparison_print("|- substrings: ", substr_eq) if vars_eq: synmap = [[i, j, names_are_synonymous(i, j, name_variations)] for i in no[2] for j in nt[2]] synmap = [i for i in synmap if i[2] == True] name_comparison_print("|-- synmap: ", synmap) for i in synmap: if no[2].index(i[0]) == nt[2].index(i[1]): score = score + (1 - score) * 0.5 else: score = score + (1 - score) * 0.15 else: name_comparison_print("|-- synmap: empty") name_comparison_print("|-- synmap score: ", score) if substr_eq and not initials_only: ssmap = [[i, j, names_are_substrings(i, j)] for i in no[2] for j in nt[2]] ssmap = [i for i in ssmap if i[2] == True] name_comparison_print("|-- substr map: ", ssmap) for i in ssmap: if no[2].index(i[0]) == nt[2].index(i[1]): score = score + (1 - score) * 0.2 else: score = score + (1 - score) * 0.05 else: name_comparison_print("|-- substr map: empty") name_comparison_print("|-- substring score: ", score) if composits_eq and not initials_only: name_comparison_print("|-- composite names") score = score + (1 - score) * 0.2 else: name_comparison_print("|-- not composite names") name_comparison_print("|-- composite score: ", score) if not gender_eq: score = score / 3. name_comparison_print("|-- apply gender penalty") else: name_comparison_print("|-- no gender penalty") name_comparison_print("|-- gender score: ", score) if surname_dist > MAX_ALLOWED_SURNAME_DISTANCE: score = 0.0 name_comparison_print("|- surname trim: ", score) else: name_comparison_print("|- no surname trim: ", score) if initials_only and (not only_initials_available or initials_penalty): score = score * .9 name_comparison_print("|- initials only penalty: ", score, initials_only, only_initials_available) else: name_comparison_print("|- no initials only penalty", initials_only, only_initials_available) name_comparison_print("||- final score: ", score) - return score def generate_last_name_cluster_str(name): ''' Use this function to find the last name cluster this name should be associated with. ''' family = split_name_parts(name.decode('utf-8'))[0] - artifact_removal = re.compile("[^a-zA-Z0-9]") return artifact_removal.sub("", family).lower() GLOBAL_gendernames = _load_gender_firstnames_dict() GLOBAL_name_variations = _load_firstname_variations() diff --git a/modules/bibauthorid/lib/bibauthorid_personid_maintenance.py b/modules/bibauthorid/lib/bibauthorid_personid_maintenance.py index 645279461..d78083418 100644 --- a/modules/bibauthorid/lib/bibauthorid_personid_maintenance.py +++ b/modules/bibauthorid/lib/bibauthorid_personid_maintenance.py @@ -1,104 +1,105 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ aidPersonID maintenance algorithms. """ from bibauthorid_name_utils import split_name_parts from bibauthorid_name_utils import create_normalized_name from bibauthorid_backinterface import get_name_by_bibrecref from bibauthorid_backinterface import copy_personids #emitting from bibauthorid_backinterface import compare_personid_tables #emitting from bibauthorid_backinterface import group_personid from bibauthorid_backinterface import check_personid_papers #emitting from bibauthorid_backinterface import personid_get_recids_affected_since as get_recids_affected_since #emitting from bibauthorid_backinterface import repair_personid #emitting from bibauthorid_backinterface import check_results #emitting from bibauthorid_backinterface import check_merger #emitting +from bibauthorid_backinterface import restore_personids #emitting def convert_personid(): from dbquery import run_sql # oh come on, the whole function will be removed soon from itertools import repeat chunk = 1000 old_personid = run_sql("SELECT `personid`, `tag`, `data`, `flag`, `lcul` FROM `aidPERSONID`") def flush_papers(args): run_sql("INSERT INTO `aidPERSONIDPAPERS` " "(`personid`, " " `bibref_table`, " " `bibref_value`, " " `bibrec`, " " `name`, " " `flag`, " " `lcul`) " "VALUES " + " , ".join(repeat("(%s, %s, %s, %s, %s, %s, %s)", len(args) / 7)) , tuple(args)) def flush_data(args): run_sql("INSERT INTO `aidPERSONIDDATA` " "(`personid`, " " `tag`, " " `data`, " " `opt1`, " " `opt2`) " "VALUES " + " , ".join(repeat("(%s, %s, %s, %s, %s)", len(args) / 5)) , tuple(args)) paper_args = [] data_args = [] for row in old_personid: if row[1] == 'paper': bibref, rec = row[2].split(',') tab, ref = bibref.split(':') try: name = get_name_by_bibrecref((int(tab), int(ref), int(rec))) except: continue name = split_name_parts(name) name = create_normalized_name(name) paper_args += [row[0], tab, ref, rec, name, row[3], row[4]] if len(paper_args) > chunk: flush_papers(paper_args) paper_args = [] elif row[1] == 'gathered_name': continue else: data_args += list(row) if len(data_args) > chunk: flush_data(data_args) data_args = [] if paper_args: flush_papers(paper_args) if data_args: flush_data(data_args) def compare_personids(path): ''' Use this function with copy_personids() to diff personids. ''' fp = open(path, "w") pid1_p, pid1_d = group_personid("aidPERSONIDPAPERS_copy", "aidPERSONIDDATA_copy") pid2_p, pid2_d = group_personid("aidPERSONIDPAPERS", "aidPERSONIDDATA") compare_personid_tables(pid1_p, pid1_d, pid2_p, pid2_d, fp) diff --git a/modules/bibauthorid/lib/bibauthorid_prob_matrix.py b/modules/bibauthorid/lib/bibauthorid_prob_matrix.py index 526fafc60..a017c9d61 100644 --- a/modules/bibauthorid/lib/bibauthorid_prob_matrix.py +++ b/modules/bibauthorid/lib/bibauthorid_prob_matrix.py @@ -1,115 +1,137 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import bibauthorid_config as bconfig from bibauthorid_comparison import compare_bibrefrecs from bibauthorid_comparison import clear_all_caches as clear_comparison_caches -from bibauthorid_backinterface import bib_matrix -from bibauthorid_backinterface import get_sql_time +from bibauthorid_backinterface import Bib_matrix from bibauthorid_backinterface import filter_modified_record_ids -from bibauthorid_general_utils import update_status \ - , update_status_final +from bibauthorid_general_utils import bibauthor_print \ + , update_status \ + , update_status_final \ + , is_eq if bconfig.DEBUG_CHECKS: - def _debug_is_eq(v1, v2): - eps = 1e-2 - return v1 + eps > v2 and v2 + eps > v1 - def _debug_is_eq_v(vl1, vl2): if isinstance(vl1, str) and isinstance(vl2, str): return vl1 == vl2 if isinstance(vl1, tuple) and isinstance(vl2, tuple): - return _debug_is_eq(vl1[0], vl2[0]) and _debug_is_eq(vl1[1], vl2[1]) + return is_eq(vl1[0], vl2[0]) and is_eq(vl1[1], vl2[1]) return False -class probability_matrix: + +class ProbabilityMatrix(object): ''' This class contains and maintains the comparison between all virtual authors. It is able to write and read from the database and update the results. ''' + def __init__(self): + self._bib_matrix = Bib_matrix() + + def load(self, lname, load_map=True, load_matrix=True): + update_status(0., "Loading probability matrix...") + self._bib_matrix.load(lname, load_map, load_matrix) + update_status_final("Probability matrix loaded.") + + def store(self, name): + update_status(0., "Saving probability matrix...") + self._bib_matrix.store(name) + update_status_final("Probability matrix saved.") + + def __getitem__(self, bibs): + return self._bib_matrix[bibs[0], bibs[1]] + + + def __get_up_to_date_bibs(self): + return frozenset(filter_modified_record_ids( + self._bib_matrix.get_keys(), + self._bib_matrix.creation_time)) - def __init__(self, cluster_set, use_cache=False, save_cache=False): + def is_up_to_date(self, cluster_set): + return self.__get_up_to_date_bibs() >= frozenset(cluster_set.all_bibs()) + + def recalculate(self, cluster_set): ''' Constructs probability matrix. If use_cache is true, it will try to load old computations from the database. If save cache is true it will save the current results into the database. @param cluster_set: A cluster set object, used to initialize the matrix. ''' def check_for_cleaning(cur_calc): if cur_calc % 10000000 == 0: clear_comparison_caches() - self._bib_matrix = bib_matrix(cluster_set) - - old_matrix = bib_matrix() + old_matrix = self._bib_matrix + cached_bibs = self.__get_up_to_date_bibs() + self._bib_matrix = Bib_matrix(cluster_set) - ncl = sum(len(cl.bibs) for cl in cluster_set.clusters) + ncl = cluster_set.num_all_bibs expected = ((ncl * (ncl - 1)) / 2) if expected == 0: expected = 1 - if use_cache and old_matrix.load(cluster_set.last_name): - cached_bibs = set(filter_modified_record_ids( - old_matrix.get_keys(), - old_matrix.creation_time)) - else: - cached_bibs = set() - - if save_cache: - creation_time = get_sql_time() - cur_calc, opti = 0, 0 for cl1 in cluster_set.clusters: update_status((float(opti) + cur_calc) / expected, "Prob matrix: calc %d, opti %d." % (cur_calc, opti)) for cl2 in cluster_set.clusters: if id(cl1) < id(cl2) and not cl1.hates(cl2): for bib1 in cl1.bibs: for bib2 in cl2.bibs: if bib1 in cached_bibs and bib2 in cached_bibs: val = old_matrix[bib1, bib2] if not val: cur_calc += 1 check_for_cleaning(cur_calc) val = compare_bibrefrecs(bib1, bib2) else: opti += 1 if bconfig.DEBUG_CHECKS: assert _debug_is_eq_v(val, compare_bibrefrecs(bib1, bib2)) else: cur_calc += 1 check_for_cleaning(cur_calc) val = compare_bibrefrecs(bib1, bib2) self._bib_matrix[bib1, bib2] = val clear_comparison_caches() + update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti)) - if save_cache: - update_status(1., "saving...") - self._bib_matrix.store(cluster_set.last_name, creation_time) - update_status_final("Matrix done. %d calc, %d opt." % (cur_calc, opti)) +def prepare_matirx(cluster_set, force): + if bconfig.DEBUG_CHECKS: + assert cluster_set._debug_test_hate_relation() + assert cluster_set._debug_duplicated_recs() - def __getitem__(self, bibs): - return self._bib_matrix[bibs[0], bibs[1]] + matr = ProbabilityMatrix() + matr.load(cluster_set.last_name, load_map=True, load_matrix=False) + if not force and matr.is_up_to_date(cluster_set): + bibauthor_print("Cluster %s is up-to-date and therefore will not be computed." + % cluster_set.last_name) + # nothing to do + return False + matr.load(cluster_set.last_name, load_map=False, load_matrix=True) + matr.recalculate(cluster_set) + matr.store(cluster_set.last_name) + return True diff --git a/modules/bibauthorid/lib/bibauthorid_rabbit.py b/modules/bibauthorid/lib/bibauthorid_rabbit.py index 04f565855..faa0d924b 100644 --- a/modules/bibauthorid/lib/bibauthorid_rabbit.py +++ b/modules/bibauthorid/lib/bibauthorid_rabbit.py @@ -1,126 +1,186 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. from itertools import cycle, imap, chain, izip from operator import itemgetter from bibtask import task_sleep_now_if_required +import bibauthorid_config as bconfig from bibauthorid_comparison import cached_sym from bibauthorid_name_utils import compare_names as comp_names from bibauthorid_name_utils import split_name_parts from bibauthorid_name_utils import create_normalized_name from bibauthorid_general_utils import update_status \ , update_status_final from bibauthorid_matrix_optimization import maximized_mapping from bibauthorid_backinterface import get_all_valid_bibrecs from bibauthorid_backinterface import filter_bibrecs_outside from bibauthorid_backinterface import get_deleted_papers from bibauthorid_backinterface import delete_paper_from_personid from bibauthorid_backinterface import get_authors_from_paper from bibauthorid_backinterface import get_coauthors_from_paper from bibauthorid_backinterface import get_signatures_from_rec from bibauthorid_backinterface import modify_signature from bibauthorid_backinterface import remove_sigs -from bibauthorid_backinterface import find_pids_by_exact_name -from bibauthorid_backinterface import new_person_from_signature -from bibauthorid_backinterface import add_signature +from bibauthorid_backinterface import find_pids_by_exact_name as _find_pids_by_exact_name +from bibauthorid_backinterface import new_person_from_signature as _new_person_from_signature +from bibauthorid_backinterface import add_signature as _add_signature from bibauthorid_backinterface import update_personID_canonical_names +from bibauthorid_backinterface import update_personID_external_ids from bibauthorid_backinterface import get_name_by_bibrecref +from bibauthorid_backinterface import populate_partial_marc_caches +from bibauthorid_backinterface import destroy_partial_marc_caches +from bibauthorid_backinterface import get_inspire_id +from bibauthorid_backinterface import get_person_with_extid +from bibauthorid_backinterface import get_name_string_to_pid_dictionary +from bibauthorid_backinterface import get_new_personid + +USE_EXT_IDS = bconfig.RABBIT_USE_EXTERNAL_IDS +USE_INSPIREID = bconfig.RABBIT_USE_EXTERNAL_ID_INSPIREID + +if bconfig.RABBIT_USE_CACHED_PID_FOR_EXACT_NAME_SEARCH: + PID_CACHE = get_name_string_to_pid_dictionary() + + def find_pids_by_exact_names_cache(name): + try: + return zip(PID_CACHE[name]) + except KeyError: + return [] + + def add_signature_using_names_cache(sig, name, pid): + try: + PID_CACHE[name].add(pid) + except KeyError: + PID_CACHE[name] = set([pid]) + _add_signature(sig, name, pid) + + def new_person_from_signature_using_names_cache(sig, name): + pid = get_new_personid() + add_signature_using_names_cache(sig, name, pid) + def rabbit(bibrecs, check_invalid_papers=False): ''' @param bibrecs: an iterable full of bibrecs @type bibrecs: an iterable of ints @return: none ''' + if bconfig.RABBIT_USE_CACHED_PID_FOR_EXACT_NAME_SEARCH: + add_signature = add_signature_using_names_cache + new_person_from_signature = new_person_from_signature_using_names_cache + find_pids_by_exact_name = find_pids_by_exact_names_cache + else: + add_signature = _add_signature + new_person_from_signature = _new_person_from_signature + find_pids_by_exact_name = _find_pids_by_exact_name compare_names = cached_sym(lambda x: x)(comp_names) # fast assign threshold threshold = 0.80 if not bibrecs or check_invalid_papers: all_bibrecs = get_all_valid_bibrecs() if not bibrecs: bibrecs = all_bibrecs if check_invalid_papers: filter_bibrecs_outside(all_bibrecs) + if len(bibrecs) > bconfig.RABBIT_USE_CACHED_GET_GROUPED_RECORDS_THRESHOLD: + populate_partial_marc_caches() + SWAPPED_GET_GROUPED_RECORDS = True + else: + SWAPPED_GET_GROUPED_RECORDS = False + updated_pids = set() deleted = frozenset(p[0] for p in get_deleted_papers()) for idx, rec in enumerate(bibrecs): task_sleep_now_if_required(True) update_status(float(idx) / len(bibrecs), "%d/%d current: %d" % (idx, len(bibrecs), rec)) if rec in deleted: delete_paper_from_personid(rec) continue markrefs = frozenset(chain(izip(cycle([100]), imap(itemgetter(0), get_authors_from_paper(rec))), izip(cycle([700]), imap(itemgetter(0), get_coauthors_from_paper(rec))))) personid_rows = [map(int, row[:3]) + [row[4]] for row in get_signatures_from_rec(rec)] personidrefs_names = dict(((row[1], row[2]), row[3]) for row in personid_rows) personidrefs = frozenset(personidrefs_names.keys()) new_signatures = list(markrefs - personidrefs) old_signatures = list(personidrefs - markrefs) new_signatures_names = dict((new, create_normalized_name(split_name_parts(get_name_by_bibrecref(new)))) for new in new_signatures) # matrix |new_signatures| X |old_signatures| matrix = [[compare_names(new_signatures_names[new], personidrefs_names[old]) for old in old_signatures] for new in new_signatures] # [(new_signatures, old_signatures)] best_match = [(new_signatures[new], old_signatures[old]) for new, old, score in maximized_mapping(matrix) if score > threshold] for new, old in best_match: modify_signature(old, rec, new, new_signatures_names[new]) remove_sigs(tuple(list(old) + [rec]) for old in old_signatures) not_matched = frozenset(new_signatures) - frozenset(map(itemgetter(0), best_match)) if not_matched: used_pids = set(r[0] for r in personid_rows) for sig in not_matched: name = new_signatures_names[sig] + matched_pids = [] + if USE_EXT_IDS: + if USE_INSPIREID: + inspire_id = get_inspire_id(sig + (rec,)) + if inspire_id: + matched_pids = list(get_person_with_extid(inspire_id[0])) + if matched_pids: + add_signature(list(sig) + [rec], name, matched_pids[0][0]) + updated_pids.add(matched_pids[0][0]) + continue + matched_pids = find_pids_by_exact_name(name) matched_pids = [p for p in matched_pids if int(p[0]) not in used_pids] if not matched_pids: new_pid = new_person_from_signature(list(sig) + [rec], name) used_pids.add(new_pid) updated_pids.add(new_pid) else: add_signature(list(sig) + [rec], name, matched_pids[0][0]) used_pids.add(matched_pids[0][0]) updated_pids.add(matched_pids[0][0]) update_status_final() if updated_pids: # an empty set will update all canonical_names update_personID_canonical_names(updated_pids) + update_personID_external_ids(updated_pids, limit_to_claimed_papers=bconfig.LIMIT_EXTERNAL_IDS_COLLECTION_TO_CLAIMED_PAPERS) + if SWAPPED_GET_GROUPED_RECORDS: + destroy_partial_marc_caches() diff --git a/modules/bibauthorid/lib/bibauthorid_recipes.py b/modules/bibauthorid/lib/bibauthorid_recipes.py index 5a07406c0..e92011652 100644 --- a/modules/bibauthorid/lib/bibauthorid_recipes.py +++ b/modules/bibauthorid/lib/bibauthorid_recipes.py @@ -1,76 +1,96 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ''' BibAuthorID recipes This file has examples how to use the backend of BibAuthorID. ''' def initial_disambiguation(): from bibauthorid_tortoise import tortoise_from_scratch from bibauthorid_personid_maintenance import check_results tortoise_from_scratch() assert check_results() # This is a super safe call to tortoise. # For the moment tortoise is in experimental phase so # it is mandatory. def safe_disambiguation_iteration(): from bibauthorid_tortoise import tortoise from bibauthorid_rabbit import rabbit from bibauthorid_personid_maintenance import check_personid_papers \ , check_results \ , repair_personid if not check_personid_papers(): rabbit([]) repair_personid() rabbit([]) assert check_personid_papers() tortoise() assert check_results() def safe_merger(): - from bibauthorid_merge import merge + from bibauthorid_merge import merge_static + from bibauthorid_merge import merge_static_oldstyle from bibauthorid_rabbit import rabbit from bibauthorid_personid_maintenance import check_personid_papers \ , check_results \ , check_merger \ , repair_personid \ , copy_personids \ , compare_personids assert check_results() if not check_personid_papers(): rabbit([]) repair_personid() rabbit([]) assert check_personid_papers() copy_personids() - merge() + merge_static_oldstyle() assert check_personid_papers() assert check_merger() compare_personids("/tmp/merge_diff") +def test_accuracy(): + from bibauthorid_tortoise import tortoise + from bibauthorid_rabbit import rabbit + from bibauthorid_personid_maintenance import check_personid_papers \ + , check_results \ + , repair_personid + from bibauthorid_merge import matched_claims + + if not check_personid_papers(): + rabbit([]) + repair_personid() + rabbit([]) + + assert check_personid_papers() + tortoise(pure = True) + assert check_results() + + return matched_claims() + diff --git a/modules/bibauthorid/lib/bibauthorid_scheduler.py b/modules/bibauthorid/lib/bibauthorid_scheduler.py index 0445c5724..941b1f9f7 100644 --- a/modules/bibauthorid/lib/bibauthorid_scheduler.py +++ b/modules/bibauthorid/lib/bibauthorid_scheduler.py @@ -1,145 +1,168 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import re import os -import operator -from itertools import izip, starmap -from invenio.bibauthorid_general_utils import print_tortoise_memory_log -from invenio.bibauthorid_general_utils import clear_tortoise_memory_log -import invenio.bibauthorid_config as bconfig - -coefs = [1. / 17., -1., 0.] +import sys +from bibauthorid_general_utils import print_tortoise_memory_log +from bibauthorid_least_squares import to_function as create_approx_func +import bibauthorid_config as bconfig +from bibauthorid_general_utils import is_eq, update_status, update_status_final def to_number(stry): return int(re.sub("\D", "", stry)) def dict_by_file(fpath): fp = open(fpath) content = fp.read() fp.close() return dict(x.split(':') for x in content.split("\n")[:-1]) def get_free_memory(): mem = dict_by_file("/proc/meminfo") return sum(map(to_number, (mem['MemFree'], mem['Buffers'], mem['Cached']))) def get_total_memory(): mem = dict_by_file("/proc/meminfo") return to_number(mem['MemTotal']) def get_peak_mem(): pid = os.getpid() mem = dict_by_file("/proc/%d/status" % pid) return map(to_number, (mem["VmPeak"], mem["VmHWM"])) -def estimate_ram_usage(bibs): - return sum(starmap(operator.mul, izip(coefs, (bibs * bibs, bibs, 1)))) + +class Estimator(object): + def __init__(self, coefs): + self.estimate = create_approx_func(coefs) + + +matrix_coefs = [1133088., 4., 0.016] +wedge_coefs = [800000., 230., 0.018] + def get_biggest_below(lim, arr): for idx, elem in enumerate(arr): if elem > lim: return idx - 1 return len(arr) - 1 -def initialize_ram_estimation(): - global coefs - coefs[2] = get_peak_mem()[0] * 0.9 def get_cores_count(): import multiprocessing return multiprocessing.cpu_count() -def schedule(job, args, sizs): - assert len(args) == len(sizs) + +def schedule(jobs, sizs, estimator, memfile_path=None): + if bconfig.DEBUG_PROCESS_PEAK_MEMORY and memfile_path: + def register_memory_usage(): + pid = os.getpid() + peak = get_peak_mem() + fp = open(memfile_path, 'a') + print_tortoise_memory_log( + {'pid' : pid, + 'peak1': peak[0], + 'peak2': peak[1], + 'est' : sizs[idx], + 'bibs' : bibs[idx] + }, + fp + ) + fp.close() + else: + def register_memory_usage(): + pass + + def run_job(idx): + try: + sys.stdout = output_killer + jobs[idx]() + register_memory_usage() + os._exit(os.EX_OK) + except Exception, e: + f = open('/tmp/exception-%s' % str(os.getpid()), "w") + f.write(str(e) + '\n') + f.close() + os._exit(os.EX_SOFTWARE) + + output_killer = open(os.devnull, 'w') + assert len(jobs) == len(sizs) + ret_status = [None] * len(jobs) max_workers = get_cores_count() - pid_2_size = {} + pid_2_idx_size = {} #free = get_free_memory() - free = get_total_memory() + initial = get_total_memory() + free = initial bibs = sizs - initialize_ram_estimation() - sizs = map(estimate_ram_usage, sizs) + sizs = map(estimator.estimate, sizs) - if bconfig.DEBUG_PROCESS_PEAK_MEMORY: - clear_tortoise_memory_log() + done = 0. + total = sum(sizs) + jobs_n = len(jobs) - too_big = sorted((idx for idx in xrange(len(sizs)) if sizs[idx] > free), reverse=True) + update_status(0., "%d / %d" % (0, jobs_n)) + too_big = sorted((idx for idx, size in enumerate(sizs) if size > free), reverse=True) for idx in too_big: pid = os.fork() if pid == 0: # child - job(*args[idx]) - if bconfig.DEBUG_PROCESS_PEAK_MEMORY: - pid = os.getpid() - print_tortoise_memory_log( - {'pid' : pid, - 'peak' : get_peak_mem(), - 'est' : sizs[idx], - 'bibs' : bibs[idx]}) - - os._exit(0) + run_job(idx) else: # parent - del args[idx] - del sizs[idx] - del bibs[idx] + done += sizs[idx] cpid, status = os.wait() + update_status(done / total, "%d / %d" % (jobs_n - len(jobs), jobs_n)) + ret_status[idx] = status assert cpid == pid + del jobs[idx] + del sizs[idx] + del bibs[idx] - while args or pid_2_size: - while len(pid_2_size) < max_workers: + while jobs or pid_2_idx_size: + while len(pid_2_idx_size) < max_workers: idx = get_biggest_below(free, sizs) - if idx != -1: pid = os.fork() if pid == 0: # child - job(*args[idx]) - if bconfig.DEBUG_PROCESS_PEAK_MEMORY: - pid = os.getpid() - print_tortoise_memory_log( - {'pid' : pid, - 'peak' : get_peak_mem(), - 'est' : sizs[idx], - 'bibs' : bibs[idx]}) - - os._exit(0) + run_job(idx) else: # parent - pid_2_size[pid] = (sizs[idx], args[idx]) + pid_2_idx_size[pid] = (idx, sizs[idx]) assert free > sizs[idx] free -= sizs[idx] - del args[idx] + del jobs[idx] del sizs[idx] del bibs[idx] else: break pid, status = os.wait() - assert pid in pid_2_size - freed, name = pid_2_size[pid] - if status != 0: - import sys - print >> sys.stderr, "Worker %s died." % str(name) - sys.stderr.flush() - assert False - + assert pid in pid_2_idx_size + idx, freed = pid_2_idx_size[pid] + done += freed + update_status(done / total, "%d / %d" % (jobs_n - len(jobs) - len(pid_2_idx_size), jobs_n)) + ret_status[idx] = status free += freed - del pid_2_size[pid] + del pid_2_idx_size[pid] - assert not pid_2_size + update_status_final("%d / %d" % (jobs_n, jobs_n)) + assert is_eq(free, initial) + assert not pid_2_idx_size + assert all(stat != None for stat in ret_status) + return ret_status diff --git a/modules/bibauthorid/lib/bibauthorid_templates.py b/modules/bibauthorid/lib/bibauthorid_templates.py index 6140dd365..db0c931d8 100644 --- a/modules/bibauthorid/lib/bibauthorid_templates.py +++ b/modules/bibauthorid/lib/bibauthorid_templates.py @@ -1,1706 +1,1726 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Bibauthorid HTML templates""" # pylint: disable=W0105 # pylint: disable=C0301 #from cgi import escape #from urllib import quote # import invenio.bibauthorid_config as bconfig from invenio.config import CFG_SITE_LANG from invenio.config import CFG_SITE_URL from invenio.config import CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL from invenio.bibformat import format_record from invenio.session import get_session from invenio.search_engine_utils import get_fieldvalues from invenio.bibauthorid_webapi import get_person_redirect_link, get_canonical_id_from_person_id +from invenio.bibauthorid_webapi import get_personiID_external_ids +from invenio.bibauthorid_frontinterface import get_bibrefrec_name_string, get_uid_from_personid from invenio.bibauthorid_frontinterface import get_bibrefrec_name_string from invenio.bibauthorid_frontinterface import get_canonical_id_from_personid from invenio.messages import gettext_set_language, wash_language +from invenio.webuser import get_email #from invenio.textutils import encode_for_xml class Template: """Templating functions used by aid""" def __init__(self, language=CFG_SITE_LANG): """Set defaults for all aid template output""" self.language = language self._ = gettext_set_language(wash_language(language)) def tmpl_person_detail_layout(self, content): ''' writes HTML content into the person css container @param content: HTML content @type content: string @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h(content) h('
') return "\n".join(html) def tmpl_notification_box(self, message, teaser="Notice:", show_close_btn=True): ''' Creates a notification box based on the jQuery UI style @param message: message to display in the box @type message: string @param teaser: Teaser text in bold next to icon @type teaser: string @param show_close_btn: display close button [x] @type show_close_btn: boolean @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h(' ') h('
') return "\n".join(html) def tmpl_error_box(self, message, teaser="Alert:", show_close_btn=True): ''' Creates an error box based on the jQuery UI style @param message: message to display in the box @type message: string @param teaser: Teaser text in bold next to icon @type teaser: string @param show_close_btn: display close button [x] @type show_close_btn: boolean @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h('
') h('

') h(' %s %s' % (teaser, message)) if show_close_btn: h(' X

') h('
') h('
') return "\n".join(html) def tmpl_ticket_box(self, teaser, message, show_close_btn=True): ''' Creates a semi-permanent box informing about ticket status notifications @param message: message to display in the box @type message: string @param teaser: Teaser text in bold next to icon @type teaser: string @param ticket: The ticket object from the session @param ticket: list of dict @param show_close_btn: display close button [x] @type show_close_btn: boolean @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h('
') h('

') h(' %s %s ' % (teaser, message)) h('' + self._('Click here to review the transactions.') + '') h('
') if show_close_btn: h(' X

') h('
') h('
') return "\n".join(html) def tmpl_search_ticket_box(self, teaser, message, search_ticket, show_close_btn=False): ''' Creates a box informing about a claim in progress for the search. @param message: message to display in the box @type message: string @param teaser: Teaser text in bold next to icon @type teaser: string @param search_ticket: The search ticket object from the session @param search_ticket: list of dict @param show_close_btn: display close button [x] @type show_close_btn: boolean @return: HTML code @rtype: string ''' html = [] h = html.append h('
') h('
') h('

') h(' %s %s ' % (teaser, message)) h("

    ") for paper in search_ticket['bibrefs']: if ',' in paper: pbibrec = paper.split(',')[1] else: pbibrec = paper h("
  • %s
  • " % (format_record(pbibrec, "ha"))) h("
") h('' + self._('Quit searching.') + '') # h('DBGticket - ' + str(search_ticket)) if show_close_btn: h(' X

') h('
') h('
') h('

 

') return "\n".join(html) def tmpl_meta_includes(self, kill_browser_cache=False): ''' Generates HTML code for the header section of the document META tags to kill browser caching Javascript includes CSS definitions @param kill_browser_cache: Do we want to kill the browser cache? @type kill_browser_cache: boolean ''' js_path = "%s/js" % CFG_SITE_URL imgcss_path = "%s/img" % CFG_SITE_URL result = [] # Add browser cache killer, hence some notifications are not displayed # out of the session. if kill_browser_cache: result = [ '', '', '', '', ''] scripts = ["jquery-ui.min.js", "jquery.form.js", "jquery.dataTables.min.js", "bibauthorid.js"] result.append('' % (imgcss_path)) result.append('' % (imgcss_path)) result.append('' % (imgcss_path)) for script in scripts: result.append('' % (js_path, script)) return "\n".join(result) def tmpl_author_confirmed(self, bibref, pid, verbiage_dict={'alt_confirm':'Confirmed.', 'confirm_text':'This record assignment has been confirmed.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Repeal!', 'repeal_text':'Repeal record assignment', 'to_other_text':'Assign to another person', 'alt_to_other':'To other person!' }, show_reset_button=True): ''' Generate play per-paper links for the table for the status "confirmed" @param bibref: construct of unique ID for this author on this paper @type bibref: string @param pid: the Person ID @type pid: int @param verbiage_dict: language for the link descriptions @type verbiage_dict: dict ''' stri = (' ' '%(alt_confirm)s' '%(confirm_text)s
') if show_reset_button: stri = stri + ( '' '%(alt_forget)s' '%(forget_text)s
') stri = stri + ( '' '%(alt_repeal)s' '%(repeal_text)s
' '' '%(alt_to_other)s' '%(to_other_text)s
') return (stri % ({'url': CFG_SITE_URL, 'ref': bibref, 'pid': pid, 'alt_confirm':verbiage_dict['alt_confirm'], 'confirm_text':verbiage_dict['confirm_text'], 'alt_forget':verbiage_dict['alt_forget'], 'forget_text':verbiage_dict['forget_text'], 'alt_repeal':verbiage_dict['alt_repeal'], 'repeal_text':verbiage_dict['repeal_text'], 'to_other_text':verbiage_dict['to_other_text'], 'alt_to_other':verbiage_dict['alt_to_other']})) def tmpl_author_repealed(self, bibref, pid, verbiage_dict={'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Rejected!', 'repeal_text':'Repeal this record assignment.', 'to_other_text':'Assign to another person', 'alt_to_other':'To other person!' }, show_reset_button=True): ''' Generate play per-paper links for the table for the status "repealed" @param bibref: construct of unique ID for this author on this paper @type bibref: string @param pid: the Person ID @type pid: int @param verbiage_dict: language for the link descriptions @type verbiage_dict: dict ''' stri = (' ' '%(alt_repeal)s' '%(repeal_text)s
') if show_reset_button: stri = stri + ( '' '%(alt_forget)s' '%(forget_text)s
') stri = stri + ( '' '%(alt_confirm)s' '%(confirm_text)s
' '' '%(alt_to_other)s' '%(to_other_text)s
') return (stri % ({'url': CFG_SITE_URL, 'ref': bibref, 'pid': pid, 'alt_confirm':verbiage_dict['alt_confirm'], 'confirm_text':verbiage_dict['confirm_text'], 'alt_forget':verbiage_dict['alt_forget'], 'forget_text':verbiage_dict['forget_text'], 'alt_repeal':verbiage_dict['alt_repeal'], 'repeal_text':verbiage_dict['repeal_text'], 'to_other_text':verbiage_dict['to_other_text'], 'alt_to_other':verbiage_dict['alt_to_other']})) def tmpl_author_undecided(self, bibref, pid, verbiage_dict={'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_repeal':'Rejected!', 'repeal_text':'This record has been repealed.', 'to_other_text':'Assign to another person', 'alt_to_other':'To other person!' }, show_reset_button=True): ''' Generate play per-paper links for the table for the status "no decision taken yet" @param bibref: construct of unique ID for this author on this paper @type bibref: string @param pid: the Person ID @type pid: int @param verbiage_dict: language for the link descriptions @type verbiage_dict: dict ''' #batchprocess?mconfirm=True&bibrefs=['100:17,16']&pid=1 str = (' ' '' '%(alt_confirm)s' '%(confirm_text)s
' '' '%(alt_repeal)s' '%(repeal_text)s
' '' '%(alt_to_other)s' '%(to_other_text)s
') return (str % ({'url': CFG_SITE_URL, 'ref': bibref, 'pid': pid, 'alt_confirm':verbiage_dict['alt_confirm'], 'confirm_text':verbiage_dict['confirm_text'], 'alt_repeal':verbiage_dict['alt_repeal'], 'repeal_text':verbiage_dict['repeal_text'], 'to_other_text':verbiage_dict['to_other_text'], 'alt_to_other':verbiage_dict['alt_to_other']})) def tmpl_open_claim(self, bibrefs, pid, last_viewed_pid, search_enabled=True): ''' Generate entry page for "claim or attribute this paper" @param bibref: construct of unique ID for this author on this paper @type bibref: string @param pid: the Person ID @type pid: int @param last_viewed_pid: last ID that had been subject to an action @type last_viewed_pid: int ''' t_html = [] h = t_html.append h(self._('You are about to attribute the following paper')) if len(bibrefs) > 1: h('s:
') else: h(':
') h("") pp_html = [] h = pp_html.append h(self.tmpl_notification_box("\n".join(t_html), self._("Info"), False)) h('

Your options:

') if pid > -1: h((' Claim for yourself
') % (CFG_SITE_URL, bibs, str(pid))) if last_viewed_pid: h((' Attribute to %s
') % (CFG_SITE_URL, bibs, str(last_viewed_pid[0]), last_viewed_pid[1])) if search_enabled: h(('' + self._(' Search for a person to attribute the paper to') + '
') % (CFG_SITE_URL, bibs)) return "\n".join(pp_html) def __tmpl_admin_records_table(self, form_id, person_id, bibrecids, verbiage_dict={'no_doc_string':'Sorry, there are currently no documents to be found in this category.', 'b_confirm':'Confirm', 'b_repeal':'Repeal', 'b_to_others':'Assign to other person', 'b_forget':'Forget decision'}, buttons_verbiage_dict={'mass_buttons':{'no_doc_string':'Sorry, there are currently no documents to be found in this category.', 'b_confirm':'Confirm', 'b_repeal':'Repeal', 'b_to_others':'Assign to other person', 'b_forget':'Forget decision'}, 'record_undecided':{'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_repeal':'Rejected!', 'repeal_text':'This record has been repealed.'}, 'record_confirmed':{'alt_confirm':'Confirmed.', 'confirm_text':'This record assignment has been confirmed.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Repeal!', 'repeal_text':'Repeal record assignment'}, 'record_repealed':{'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Rejected!', 'repeal_text':'Repeal this record assignment.'}}, show_reset_button=True): ''' Generate the big tables for the person overview page @param form_id: name of the form @type form_id: string @param person_id: Person ID @type person_id: int @param bibrecids: List of records to display @type bibrecids: list @param verbiage_dict: language for the elements @type verbiage_dict: dict @param buttons_verbiage_dict: language for the buttons @type buttons_verbiage_dict: dict ''' no_papers_html = ['
'] no_papers_html.append('%s' % verbiage_dict['no_doc_string']) no_papers_html.append('
') if not bibrecids or not person_id: return "\n".join(no_papers_html) pp_html = [] h = pp_html.append h('
' % (form_id)) h('') h('
') h(('∟') % (CFG_SITE_URL)) h('' % (person_id)) h('' % verbiage_dict['b_confirm']) h('' % verbiage_dict['b_repeal']) h('' % verbiage_dict['b_to_others']) if show_reset_button: h('' % verbiage_dict['b_forget']) h("
") h('') h("") h(" ") h(' ') h(' ') h(" ") h(" ") h(" ") h(" ") h(" ") h(" ") h("") h("") for idx, paper in enumerate(bibrecids): h(' ') h(' ' % (paper['bibref'])) rec_info = format_record(paper['recid'], "ha") rec_info = str(idx + 1) + '. ' + rec_info h(" " % (rec_info)) h(" " % (paper['authorname'])) aff = "" if paper['authoraffiliation']: aff = paper['authoraffiliation'] else: aff = "Not assigned" h(" " % (aff)) if paper['paperdate']: pdate = paper['paperdate'] else: pdate = 'N.A.' h(" " % pdate) if paper['paperexperiment']: pdate = paper['paperexperiment'] else: pdate = 'N.A.' h(" " % pdate) paper_status = self._("No status information found.") if paper['flag'] == 2: paper_status = self.tmpl_author_confirmed(paper['bibref'], person_id, verbiage_dict=buttons_verbiage_dict['record_confirmed'], show_reset_button=show_reset_button) elif paper['flag'] == -2: paper_status = self.tmpl_author_repealed(paper['bibref'], person_id, verbiage_dict=buttons_verbiage_dict['record_repealed'], show_reset_button=show_reset_button) else: paper_status = self.tmpl_author_undecided(paper['bibref'], person_id, verbiage_dict=buttons_verbiage_dict['record_undecided'], show_reset_button=show_reset_button) h(' ') h(" ") h(" ") h("
 Paper Short InfoAuthor NameAffiliationDateExperimentActions
%s%s%s%s%s
%s  
' % (paper['bibref'], paper['flag'], paper_status)) if 'rt_status' in paper and paper['rt_status']: h('' % (CFG_SITE_URL, self._("Operator review of user actions pending"))) h('
") h('') h('
') h(('∟') % (CFG_SITE_URL)) h('' % (person_id)) h('' % verbiage_dict['b_confirm']) h('' % verbiage_dict['b_repeal']) h('' % verbiage_dict['b_to_others']) if show_reset_button: h('' % verbiage_dict['b_forget']) h("
") h("
") return "\n".join(pp_html) def __tmpl_reviews_table(self, person_id, bibrecids, admin=False): ''' Generate the table for potential reviews. @param form_id: name of the form @type form_id: string @param person_id: Person ID @type person_id: int @param bibrecids: List of records to display @type bibrecids: list @param admin: Show admin functions @type admin: boolean ''' no_papers_html = ['
'] no_papers_html.append(self._('Sorry, there are currently no records to be found in this category.')) no_papers_html.append('
') if not bibrecids or not person_id: return "\n".join(no_papers_html) pp_html = [] h = pp_html.append h('
') h('') h(" ") h(" ") h(' ') h(' ') h(" ") h(" ") h(" ") h(" ") for paper in bibrecids: h(' ') h(' ' % (paper)) rec_info = format_record(paper[0], "ha") if not admin: rec_info = rec_info.replace("person/search?q=", "author/") h(" " % (rec_info)) h(' ' % (paper)) h(" ") h(" ") h("
 Paper Short InfoActions
%s' + self._('Review Transaction') + '
") h('
' + self._(' On all pages: ')) h('' + self._('Select All') + ' | ') h('' + self._('Select None') + ' | ') h('' + self._('Invert Selection') + '') h('
') h('
') h('∟ With selected do: ') h('' % (person_id)) h('') h('') h("
") h('
') return "\n".join(pp_html) def tmpl_admin_person_info_box(self, ln, person_id= -1, names=[]): ''' Generate the box showing names @param ln: the language to use @type ln: string @param person_id: Person ID @type person_id: int @param names: List of names to display @type names: list ''' html = [] h = html.append if not ln: pass #class="ui-tabs ui-widget ui-widget-content ui-corner-all"> h('
' + self._('Names variants:') + '

') h("

") h('' % (person_id, person_id)) for name in names: # h(("%s "+self._('as appeared on')+" %s"+self._(' records')+"
") # % (name[0], name[1])) h(("%s (%s); ") % (name[0], name[1])) h("

") h("
") return "\n".join(html) def tmpl_admin_tabs(self, ln=CFG_SITE_LANG, person_id= -1, rejected_papers=[], rest_of_papers=[], review_needed=[], rt_tickets=[], open_rt_tickets=[], show_tabs=['records', 'repealed', 'review', 'comments', 'tickets', 'data'], show_reset_button=True, ticket_links=['delete', 'commit', 'del_entry', 'commit_entry'], verbiage_dict={'confirmed':'Records', 'repealed':'Not this person\'s records', 'review':'Records in need of review', 'tickets':'Open Tickets', 'data':'Data', 'confirmed_ns':'Papers of this Person', 'repealed_ns':'Papers _not_ of this Person', 'review_ns':'Papers in need of review', 'tickets_ns':'Tickets for this Person', 'data_ns':'Additional Data for this Person'}, buttons_verbiage_dict={'mass_buttons':{'no_doc_string':'Sorry, there are currently no documents to be found in this category.', 'b_confirm':'Confirm', 'b_repeal':'Repeal', 'b_to_others':'Assign to other person', 'b_forget':'Forget decision'}, 'record_undecided':{'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_repeal':'Rejected!', 'repeal_text':'This record has been repealed.'}, 'record_confirmed':{'alt_confirm':'Confirmed.', 'confirm_text':'This record assignment has been confirmed.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Repeal!', 'repeal_text':'Repeal record assignment'}, 'record_repealed':{'alt_confirm':'Confirm!', 'confirm_text':'Confirm record assignment.', 'alt_forget':'Forget decision!', 'forget_text':'Forget assignment decision', 'alt_repeal':'Rejected!', 'repeal_text':'Repeal this record assignment.'}}): ''' Generate the tabs for the person overview page @param ln: the language to use @type ln: string @param person_id: Person ID @type person_id: int @param rejected_papers: list of repealed papers @type rejected_papers: list @param rest_of_papers: list of attributed of undecided papers @type rest_of_papers: list @param review_needed: list of papers that need a review (choose name) @type review_needed:list @param rt_tickets: list of tickets for this Person @type rt_tickets: list @param open_rt_tickets: list of open request tickets @type open_rt_tickets: list @param show_tabs: list of tabs to display @type show_tabs: list of strings @param ticket_links: list of links to display @type ticket_links: list of strings @param verbiage_dict: language for the elements @type verbiage_dict: dict @param buttons_verbiage_dict: language for the buttons @type buttons_verbiage_dict: dict ''' html = [] h = html.append h('
') h(' ') if 'records' in show_tabs: h('
') r = verbiage_dict['confirmed_ns'] h('' % r) h(self.__tmpl_admin_records_table("massfunctions", person_id, rest_of_papers, verbiage_dict=buttons_verbiage_dict['mass_buttons'], buttons_verbiage_dict=buttons_verbiage_dict, show_reset_button=show_reset_button)) h("
") if 'repealed' in show_tabs: h('
') r = verbiage_dict['repealed_ns'] h('' % r) h(self._('These records have been marked as not being from this person.')) h('
' + self._('They will be regarded in the next run of the author ') + self._('disambiguation algorithm and might disappear from this listing.')) h(self.__tmpl_admin_records_table("rmassfunctions", person_id, rejected_papers, verbiage_dict=buttons_verbiage_dict['mass_buttons'], buttons_verbiage_dict=buttons_verbiage_dict, show_reset_button=show_reset_button)) h("
") if 'review' in show_tabs: h('
') r = verbiage_dict['review_ns'] h('' % r) h(self.__tmpl_reviews_table(person_id, review_needed, True)) h('
') if 'tickets' in show_tabs: h('
') r = verbiage_dict['tickets'] h('' % r) r = verbiage_dict['tickets_ns'] h('

%s:

' % r) if rt_tickets: pass # open_rt_tickets = [a for a in open_rt_tickets if a[1] == rt_tickets] for t in open_rt_tickets: name = self._('Not provided') surname = self._('Not provided') uidip = self._('Not available') comments = self._('No comments') email = self._('Not provided') date = self._('Not Available') actions = [] for info in t[0]: if info[0] == 'firstname': name = info[1] elif info[0] == 'lastname': surname = info[1] elif info[0] == 'uid-ip': uidip = info[1] elif info[0] == 'comments': comments = info[1] elif info[0] == 'email': email = info[1] elif info[0] == 'date': date = info[1] elif info[0] in ['confirm', 'repeal']: actions.append(info) if 'delete' in ticket_links: h(('Ticket number: %(tnum)s ' + self._(' Delete this ticket') + ' ') % ({'tnum':t[1], 'url':CFG_SITE_URL, 'pid':str(person_id)})) if 'commit' in ticket_links: h((' or ' + self._(' Commit this entire ticket') + '
') % ({'tnum':t[1], 'url':CFG_SITE_URL, 'pid':str(person_id)})) h('
') h('Open from: %s, %s
' % (surname, name)) h('Date: %s
' % date) h('identified by: %s
' % uidip) h('email: %s
' % email) h('comments: %s
' % comments) h('Suggested actions:
') h('
') for a in actions: bibref, bibrec = a[1].split(',') pname = get_bibrefrec_name_string(bibref) title = "" try: title = get_fieldvalues(int(bibrec), "245__a")[0] except IndexError: title = "No title available" if 'commit_entry' in ticket_links: h('%(action)s - %(name)s on %(title)s ' % ({'action': a[0], 'url': CFG_SITE_URL, 'pid': str(person_id), 'bib':a[1], 'name': pname, 'title': title, 'rt': t[1]})) else: h('%(action)s - %(name)s on %(title)s' % ({'action': a[0], 'name': pname, 'title': title})) if 'del_entry' in ticket_links: h(' - Delete this entry ' % ({'action': a[0], 'url': CFG_SITE_URL, 'pid': str(person_id), 'bib': a[1], 'rt': t[1]})) h(' - View record
' % ({'url':CFG_SITE_URL, 'record':str(bibrec)})) h('
') h('
') # h(str(open_rt_tickets)) h("
") if 'data' in show_tabs: h('
') r = verbiage_dict['data_ns'] h('' % r) canonical_name = get_canonical_id_from_person_id(person_id) if '.' in str(canonical_name) and not isinstance(canonical_name, int): canonical_name = canonical_name[0:canonical_name.rindex('.')] h('
Canonical name setup ') h('
Current canonical name: %s
' % (canonical_name, CFG_SITE_URL)) h('') h(' ' % canonical_name) h('' % person_id) h('') h('
NOTE: please note the a number is appended automatically to the name displayed above. This cannot be manually triggered so to ensure unicity of IDs.') h('To change the number if greater then one, please change all the other names first, then updating this one will do the trick.
') h('
') - h('

' + self._('... This tab is currently under construction ... ') + '

') + userid = get_uid_from_personid(person_id) + h('

') + h(' Internal IDs
') + if userid: + email = get_email(int(userid)) + h('UserID: INSPIRE user %s is associated with this profile with email: %s' % (str(userid), str(email))) + else: + h('UserID: There is no INSPIRE user associated to this profile!') + h('

') + + external_ids = get_personiID_external_ids(person_id) + h('
') + h(' External IDs
') + for id in external_ids: + for k in external_ids[id]: + h(' %s : %s
' % (id, k)) + h('
') + h("
") h("
") return "\n".join(html) def tmpl_bibref_check(self, bibrefs_auto_assigned, bibrefs_to_confirm): ''' Generate overview to let user chose the name on the paper that resembles the person in question. @param bibrefs_auto_assigned: list of auto-assigned papers @type bibrefs_auto_assigned: list @param bibrefs_to_confirm: list of unclear papers and names @type bibrefs_to_confirm: list ''' html = [] h = html.append h('
') h('

' + self._("Make sure we match the right names!") + '

') h('

' + self._('Please select an author on each of the records that will be assigned.') + '
') h(self._('Papers without a name selected will be ignored in the process.')) h('

') for person in bibrefs_to_confirm: if not "bibrecs" in bibrefs_to_confirm[person]: continue h((self._("Select name for") + " %s") % bibrefs_to_confirm[person]["person_name"]) pid = person for recid in bibrefs_to_confirm[person]["bibrecs"]: h('
') try: fv = get_fieldvalues(int(recid), "245__a")[0] except (ValueError, IndexError, TypeError): fv = self._('Error retrieving record title') h(self._("Paper title: ") + fv) h('') h("
") if bibrefs_auto_assigned: h(self._('The following names have been automatically chosen:')) for person in bibrefs_auto_assigned: if not "bibrecs" in bibrefs_auto_assigned[person]: continue h((self._("For") + " %s:") % bibrefs_auto_assigned[person]["person_name"]) pid = person for recid in bibrefs_auto_assigned[person]["bibrecs"]: try: fv = get_fieldvalues(int(recid), "245__a")[0] except (ValueError, IndexError, TypeError): fv = self._('Error retrieving record title') h('
') h(('%s' + self._(' -- With name: ')) % (fv)) #, bibrefs_auto_assigned[person]["bibrecs"][recid][0][1])) # asbibref = "%s||%s" % (person, bibrefs_auto_assigned[person]["bibrecs"][recid][0][0]) pbibref = bibrefs_auto_assigned[person]["bibrecs"][recid][0][0] h('') # h('' # % (recid, asbibref)) h('
') h('
') h(' ') h(' ') h("
") h('
') return "\n".join(html) def tmpl_invenio_search_box(self): ''' Generate little search box for missing papers. Links to main invenio search on start papge. ''' html = [] h = html.append h('
Search for missing papers:
' % CFG_SITE_URL) h(' ') h('') h('
') return "\n".join(html) def tmpl_person_menu(self): ''' Generate the menu bar ''' html = [] h = html.append h('
') h(' ') h('
') return "\n".join(html) def tmpl_person_menu_admin(self): ''' Generate the menu bar ''' html = [] h = html.append h('
') h(' ') h('
') return "\n".join(html) def tmpl_ticket_final_review(self, req, mark_yours=[], mark_not_yours=[], mark_theirs=[], mark_not_theirs=[]): ''' Generate final review page. Displaying transactions if they need confirmation. @param req: Apache request object @type req: Apache request object @param mark_yours: papers marked as 'yours' @type mark_yours: list @param mark_not_yours: papers marked as 'not yours' @type mark_not_yours: list @param mark_theirs: papers marked as being someone else's @type mark_theirs: list @param mark_not_theirs: papers marked as NOT being someone else's @type mark_not_theirs: list ''' def html_icon_legend(): html = [] h = html.append h('
') h("

") h(self._("Symbols legend: ")) h("

") h('') h('' % (CFG_SITE_URL, self._("Everything is shiny, captain!"))) h(self._('The result of this request will be visible immediately')) h('
') h('') h('' % (CFG_SITE_URL, self._("Confirmation needed to continue"))) h(self._('The result of this request will be visible immediately but we need your confirmation to do so for this paper has been manually claimed before')) h('
') h('') h('' % (CFG_SITE_URL, self._("This will create a change request for the operators"))) h(self._("The result of this request will be visible upon confirmation through an operator")) h("") h("
") return "\n".join(html) def mk_ticket_row(ticket): recid = -1 rectitle = "" recauthor = "No Name Found." personname = "No Name Found." try: recid = ticket['bibref'].split(",")[1] except (ValueError, KeyError, IndexError): return "" try: rectitle = get_fieldvalues(int(recid), "245__a")[0] except (ValueError, IndexError, TypeError): rectitle = self._('Error retrieving record title') if "authorname_rec" in ticket: recauthor = ticket['authorname_rec'] if "person_name" in ticket: personname = ticket['person_name'] html = [] h = html.append # h("Debug: " + str(ticket) + "
") h(' ') h('') h(rectitle) h('') h('') h((personname + " (" + self._("Selected name on paper") + ": %s)") % recauthor) h('') h('') if ticket['status'] == "granted": h('' % (CFG_SITE_URL, self._("Everything is shiny, captain!"))) elif ticket['status'] == "warning_granted": h('' % (CFG_SITE_URL, self._("Verification needed to continue"))) else: h('' % (CFG_SITE_URL, self._("This will create a request for the operators"))) h('') h('') h('' 'Cancel' '' % (CFG_SITE_URL, ticket['bibref'])) h('') return "\n".join(html) session = get_session(req) pinfo = session["personinfo"] ulevel = pinfo["ulevel"] html = [] h = html.append # h(html_icon_legend()) if "checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"]: h(self.tmpl_error_box(self._("Please Check your entries"), self._("Sorry."))) if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "tickets" in pinfo["checkout_faulty_fields"]): h(self.tmpl_error_box(self._("Please provide at least one transaction."), self._("Error:"))) # h('
' + # self._('Almost done! Please use the button "Confirm these changes" ' # 'at the end of the page to send this request to an operator ' # 'for review!') + '
') h('
') h("

" + self._('Please provide your information') + "

") h('
' % (CFG_SITE_URL)) if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "user_first_name" in pinfo["checkout_faulty_fields"]): h("

" + self._('Please provide your first name') + "

") h("

") if "user_first_name_sys" in pinfo and pinfo["user_first_name_sys"]: h((self._("Your first name:") + " %s") % pinfo["user_first_name"]) else: h(self._('Your first name:') + ' ' % pinfo["user_first_name"]) if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "user_last_name" in pinfo["checkout_faulty_fields"]): h("

" + self._('Please provide your last name') + "

") h("

") if "user_last_name_sys" in pinfo and pinfo["user_last_name_sys"]: h((self._("Your last name:") + " %s") % pinfo["user_last_name"]) else: h(self._('Your last name:') + ' ' % pinfo["user_last_name"]) h("

") if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "user_email" in pinfo["checkout_faulty_fields"]): h("

" + self._('Please provide your eMail address') + "

") if ("checkout_faulty_fields" in pinfo and pinfo["checkout_faulty_fields"] and "user_email_taken" in pinfo["checkout_faulty_fields"]): h("

" + self._('This eMail address is reserved by a user. Please log in or provide an alternative eMail address') + "

") h("

") if "user_email_sys" in pinfo and pinfo["user_email_sys"]: h((self._("Your eMail:") + " %s") % pinfo["user_email"]) else: h((self._('Your eMail:') + ' ') % pinfo["user_email"]) h("

") h(self._("You may leave a comment (optional)") + ":
") h('") h("

") h("

 

") h('
') h((' ') % self._("Continue claiming*")) h((' ') % self._("Confirm these changes**")) h('') h((' ') % self._("!Delete the entire request!")) h('') h('
') h("
") h('
') h('
') h('') if not ulevel == "guest": h('') h("") h('') if mark_yours: for idx, ticket in enumerate(mark_yours): h('' % ((idx + 1) % 2)) h(mk_ticket_row(ticket)) h('') else: h('') h('') h('') h("") h('') h("") h('') if mark_not_yours: for idx, ticket in enumerate(mark_not_yours): h('' % ((idx + 1) % 2)) h(mk_ticket_row(ticket)) h('') else: h('') h('') h('') h("") h('') h("") h('') if mark_theirs: for idx, ticket in enumerate(mark_theirs): h('' % ((idx + 1) % 2)) h(mk_ticket_row(ticket)) h('') else: h('') h('') h('') h("") h('') h("") h('') if mark_not_theirs: for idx, ticket in enumerate(mark_not_theirs): h('' % ((idx + 1) % 2)) h(mk_ticket_row(ticket)) h('') else: h('') h('') h('') h("") h('

" + self._('Mark as your documents') + "

 Nothing staged as yours

" + self._("Mark as _not_ your documents") + "

 ' + self._('Nothing staged as not yours') + '

" + self._('Mark as their documents') + "

 ' + self._('Nothing staged in this category') + '

" + self._('Mark as _not_ their documents') + "

 ' + self._('Nothing staged in this category') + '
') h("
") h("

") h(self._(" * You can come back to this page later. Nothing will be lost.
")) h(self._(" ** Performs all requested changes. Changes subject to permission restrictions " "will be submitted to an operator for manual review.")) h("

") h(html_icon_legend()) return "\n".join(html) def tmpl_author_search(self, query, results, search_ticket=None, author_pages_mode=True, fallback_mode=False, fallback_title='', fallback_message='', new_person_link=False): ''' Generates the search for Person entities. @param query: the query a user issued to the search @type query: string @param results: list of results @type results: list @param search_ticket: search ticket object to inform about pending claiming procedure @type search_ticket: dict ''' linktarget = "person" if author_pages_mode: linktarget = "author" if not query: query = "" html = [] h = html.append h('
') h('Find author clusters by name. e.g: Ellis, J:
') h('' % query) h('') h('
') if fallback_mode: if fallback_title: h('' % fallback_title) if fallback_message: h('%s' % fallback_message) if not results and not query: h('') return "\n".join(html) h("

 

") if query and not results: authemail = CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL h(('' + self._("We do not have a publication list for '%s'." + " Try using a less specific author name, or check" + " back in a few days as attributions are updated " + "frequently. Or you can send us feedback, at ") + "%s.") % (query, authemail, authemail)) h('') if new_person_link: if search_ticket: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, '-3') for r in search_ticket['bibrefs']: link = link + '&selection=%s' % str(r) else: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, '-3') h('
') h('' % (link)) h(self._("Create a new Person for your search")) h('') h('
') return "\n".join(html) # base_color = 100 # row_color = 0 for index, result in enumerate(results): # if len(results) > base_color: # row_color += 1 # else: # row_color = base_color - (base_color - index * # (base_color / len(results))) pid = result[0] names = result[1] papers = result[2] try: total_papers = result[3] if total_papers > 1: papers_string = '(%s Papers)' % str(total_papers) elif total_papers == 1: if (len(papers) == 1 and len(papers[0]) == 1 and papers[0][0] == 'Not retrieved to increase performances.'): papers_string = '' else: papers_string = '(1 Paper)' else: papers_string = '(No papers)' except IndexError: papers_string = '' h('
' % (index % 2)) h('
') # h('%s. ' # % (row_color, row_color, row_color, index + 1)) h('%s. ' % (index + 1)) # for nindex, name in enumerate(names): # color = row_color + nindex * 35 # color = min(color, base_color) # h('%s; ' # % (color, color, color, name[0])) for name in names: h('%s ' % (name[0])) h('
') h('') if index < bconfig.PERSON_SEARCH_RESULTS_SHOW_PAPERS_PERSON_LIMIT: h(('' ' ' + self._('Recent Papers') + '') % (pid)) else: h("") if search_ticket: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, pid) for r in search_ticket['bibrefs']: link = link + '&selection=%s' % str(r) h(('' '' '' + self._('YES!') + '' + self._(' Attribute Papers To ') + '%s %s ') % (link, get_person_redirect_link(pid), papers_string)) else: h(('' '' + self._('Publication List ') + '(%s) %s ') % (CFG_SITE_URL, linktarget, get_person_redirect_link(pid), get_person_redirect_link(pid), papers_string)) h('
' % (pid)) if papers and index < bconfig.PERSON_SEARCH_RESULTS_SHOW_PAPERS_PERSON_LIMIT: h((self._('Showing the') + ' %d ' + self._('most recent documents:')) % len(papers)) h("
    ") for paper in papers: h("
  • %s
  • " % (format_record(paper[0], "ha"))) h("
") elif not papers: h("

" + self._('Sorry, there are no documents known for this person') + "

") elif index >= bconfig.PERSON_SEARCH_RESULTS_SHOW_PAPERS_PERSON_LIMIT: h("

" + self._('Information not shown to increase performances. Please refine your search.') + "

") h(('' '' + self._('Publication List ') + '(%s) (in a new window or tab)') % (CFG_SITE_URL, linktarget, get_person_redirect_link(pid), get_person_redirect_link(pid))) h('
') h('
') if new_person_link: if search_ticket: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, '-3') for r in search_ticket['bibrefs']: link = link + '&selection=%s' % str(r) else: link = "%s/person/action?confirm=True&pid=%s" % (CFG_SITE_URL, '-3') h('
') h('' % (link)) h(self._("Create a new Person for your search")) h('') h('
') return "\n".join(html) def tmpl_welcome_start(self): ''' Shadows the behaviour of tmpl_search_pagestart ''' return '
' def tmpl_welcome_arxiv(self): ''' SSO landing/welcome page. ''' html = [] h = html.append h('

Congratulations! you have now successfully connected to INSPIRE via arXiv.org!

') h('

Right now, you can verify your' ' publication records, which will help us to produce better publication lists and' ' citation statistics.' '

') h('

We are currently importing your publication list from arXiv.org .' 'When we\'re done, you\'ll see a link to verify your' ' publications below; please claim the papers that are yours ' ' and remove the ones that are not. This information will be automatically processed' ' or be sent to our operator for approval if needed, usually within 24' ' hours.' '

') h('If you have ' 'any questions or encounter any problems please contact us here: ' '%s

' % (CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL, CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL)) return "\n".join(html) def tmpl_welcome(self): ''' SSO landing/welcome page. ''' html = [] h = html.append h('

Congratulations! you have successfully logged in!

') h('

We are currently creating your publication list. When we\'re done, you\'ll see a link to correct your ' 'publications below.

') h('

When the link appears we invite you to confirm the papers that are ' 'yours and to reject the ones that you are not author of. If you have ' 'any questions or encounter any problems please contact us here: ' '%s

' % (CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL, CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL)) return "\n".join(html) def tmpl_claim_stub(self, person='-1'): ''' claim stub page ''' html = [] h = html.append h('
  • Login through arXiv.org ' % bconfig.BIBAUTHORID_CFG_INSPIRE_LOGIN) - h(' - Use this option if you have an arXiv account.') + h(' - Use this option if you have an arXiv account and have claimed your papers in arXiv.') h('(If you login through arXiv.org, INSPIRE will immediately verify you as an author and process your claimed papers.)

    ') h('
  • Continue as a guest ' % (CFG_SITE_URL, person)) h(' - Use this option if you DON\'T have an arXiv account, or you have not claimed any paper in arXiv.') h('(If you login as a guest, INSPIRE will need to confirm you as an author before processing your claimed papers.)

    ') h('If you login through arXiv.org we can verify that you are the author of these papers and accept your claims rapidly, ' 'as well as adding additional claims from arXiv.
    If you choose not to login via arXiv your changes will ' 'be publicly visible only after our editors check and confirm them, usually a few days.
    ' 'Either way, claims made on behalf of another author will go through our staff and may take longer to display. ' 'This applies as well to papers which have been previously claimed, by yourself or someone else.') return "\n".join(html) def tmpl_welcome_link(self): ''' Creates the link for the actual user action. ''' return '' + \ self._('Correct my publication lists!') + \ '' def tmpl_welcome_personid_association(self, pid): """ """ canon_name = get_canonical_id_from_personid(pid) head = "
    " if canon_name: body = ("Your arXiv.org account is associated " "with person %s." % canon_name[0][0]) else: body = ("Warning: your arXiv.org account is associated with an empty profile. " "This can happen if it the first time you log in and you do not have any " "paper directly claimen in arXiv.org which we can use to identify you." "If this is the case, you are welcome to search and claim your papers to your" " new profile manually, or please contact us to get help.") body += ("
    You are very welcome to contact us shall you need any help or explanation" " about the management of" " your profile page" " in INSPIRE and it's connections with arXiv.org: " ''' authors@inspirehep.net ''') tail = "
    " return head + body + tail def tmpl_welcome_arXiv_papers(self, paps): ''' Creates the list of arXiv papers ''' plist = "

    " if paps: plist = plist + "We have got and we are about to automatically claim for You the following papers from arXiv.org:
    " for p in paps: plist = plist + " " + str(p) + "
    " else: plist = "We have got no papers from arXiv.org which we could claim automatically for You.
    " return plist def tmpl_welcome_end(self): ''' Shadows the behaviour of tmpl_search_pageend ''' return '
' def tmpl_tickets_admin(self, tickets=[]): ''' Open tickets short overview for operators. ''' html = [] h = html.append if len(tickets) > 0: h('List of open tickets:

') for t in tickets: h(' %(longname)s - (%(cname)s - PersonID: %(pid)s): %(num)s open tickets.
' % ({'cname':str(t[1]), 'longname':str(t[0]), 'pid':str(t[2]), 'num':str(t[3])})) else: h('There are currently no open tickets.') return "\n".join(html) # pylint: enable=C0301 diff --git a/modules/bibauthorid/lib/bibauthorid_tortoise.py b/modules/bibauthorid/lib/bibauthorid_tortoise.py index f2735ddf4..39ac25a89 100644 --- a/modules/bibauthorid/lib/bibauthorid_tortoise.py +++ b/modules/bibauthorid/lib/bibauthorid_tortoise.py @@ -1,113 +1,201 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -from operator import itemgetter -from itertools import groupby +import bibauthorid_config as bconfig +from datetime import datetime -from bibauthorid_cluster_set import Cluster_set -from bibauthorid_cluster_set import cluster_sets_from_marktables +from bibauthorid_cluster_set import delayed_cluster_sets_from_marktables +from bibauthorid_cluster_set import delayed_cluster_sets_from_personid from bibauthorid_wedge import wedge from bibauthorid_name_utils import generate_last_name_cluster_str -from bibauthorid_backinterface import get_all_names_from_personid -from bibauthorid_backinterface import in_results +from bibauthorid_backinterface import empty_results_table +from bibauthorid_backinterface import remove_result_cluster from bibauthorid_general_utils import bibauthor_print -from bibauthorid_scheduler import schedule +from bibauthorid_prob_matrix import prepare_matirx +from bibauthorid_scheduler import schedule \ + , Estimator \ + , matrix_coefs \ + , wedge_coefs +''' + There are three main entry points to tortoise + + i) tortoise + Performs disambiguation iteration. + The arguemnt pure indicates whether to use + the claims and the rejections or not. + Use pure=True only to test the accuracy of tortoise. + + ii) tortoise_from_scratch + NOT RECOMMENDED! + Use this function only if you have just + installed invenio and this is your first + disambiguation or if personid is broken. + + iii) tortoise_last_name + Computes the clusters for only one last name + group. Is is primary used for testing. It + may also be used to fix a broken last name + cluster. It does not involve multiprocessing + so it is convinient to debug with pdb. +''' + +# Exit codes: +# The standard ones are not well documented +# so we are using random numbers. def tortoise_from_scratch(): - cluster_sets = ((cs, sum(len(c.bibs) for c in cs.clusters)) - for cs in cluster_sets_from_marktables()) - cluster_sets = sorted(cluster_sets, key=itemgetter(1)) - args = [(x[0], ) for x in cluster_sets] - sizs = map(itemgetter(1), cluster_sets) + bibauthor_print("Preparing cluster sets.") + cluster_sets, lnames, sizes = delayed_cluster_sets_from_marktables() + bibauthor_print("Building all matrices.") + exit_statuses = schedule_create_matrix( + cluster_sets, + sizes, + force=True) + assert len(exit_statuses) == len(cluster_sets) + + empty_results_table() + + bibauthor_print("Preparing cluster sets.") + cluster_sets, lnames, sizes = delayed_cluster_sets_from_marktables() + bibauthor_print("Starting disambiguation.") + exit_statuses = schedule_wedge_and_store( + cluster_sets, + sizes) + assert len(exit_statuses) == len(cluster_sets) + + +def tortoise(pure=False, + force_matrix_creation=False, + skip_matrix_creation=False, + last_run=None): + assert not force_matrix_creation or not skip_matrix_creation + # The computation must be forced in case we want + # to compute pure results + force_matrix_creation = force_matrix_creation or pure + + if not skip_matrix_creation: + bibauthor_print("Preparing cluster sets.") + clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run) + bibauthor_print("Building all matrices.") + exit_statuses = schedule_create_matrix( + clusters, + sizes, + force=force_matrix_creation) + assert len(exit_statuses) == len(clusters) + + bibauthor_print("Preparing cluster sets.") + clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure, last_run) + bibauthor_print("Starting disambiguation.") + exit_statuses = schedule_wedge_and_store( + clusters, + sizes) + assert len(exit_statuses) == len(clusters) + + +def tortoise_last_name(name, from_mark=False, pure=False): + assert not(from_mark and pure) - schedule(disambiguate, args, sizs) - - -def tortoise(pure=False, only_missing=False): - names = create_lastname_list_from_personid() - names = sorted(names, key=itemgetter(2)) - args = [(x[1], x[0], pure, only_missing) for x in names] - sizs = map(itemgetter(2), names) - - schedule(disambiguate_last_name, args, sizs) - - -def tortoise_last_name(name, pure=False): lname = generate_last_name_cluster_str(name) - names = create_lastname_list_from_personid() - names = filter(lambda x: x[0] == name, names) - - if names: - pids = names[0][1] - bibauthor_print("Found %s(%s), %d pids" % (name, lname, len(pids))) - disambiguate_last_name(pids, lname, pure, False) + if from_mark: + clusters, lnames, sizes = delayed_cluster_sets_from_marktables() else: + clusters, lnames, sizes = delayed_cluster_sets_from_personid(pure) + + try: + idx = lnames.index(lname) + cluster = clusters[idx] + size = sizes[idx] + bibauthor_print("Found, %s(%s). Total number of bibs: %d." % (name, lname, size)) + cluster_set = cluster() + create_matrix(cluster_set, True) + wedge_and_store(cluster_set) + except IndexError: bibauthor_print("Sorry, %s(%s) not found in the last name clusters" % (name, lname)) -def create_lastname_list_from_personid(): - ''' - This function generates a dictionary from a last name - to list of personids which have this lastname. - ''' - # ((personid, [full Name1], Nbibs) ... ) - all_names = get_all_names_from_personid() - - # ((personid, last_name, Nbibs) ... ) - all_names = ((row[0], generate_last_name_cluster_str(iter(row[1]).next()), row[2]) - for row in all_names) - - # { (last_name, [(personid)... ], Nbibs) ... } - all_names = groupby(sorted(all_names, key=itemgetter(1)), key=itemgetter(1)) - all_names = ((key, list(data)) for key, data in all_names) - all_names = ((key, map(itemgetter(0), data), sum(x[2] for x in data)) for key, data in all_names) - - return all_names +def create_matrix(cluster_set, force): + bibs = cluster_set.num_all_bibs + expected = bibs * (bibs - 1) / 2 + bibauthor_print("Start building matrix for %s. Total number of bibs: %d, " + "maximum number of comparisons: %d" + % (cluster_set.last_name, bibs, expected)) + return prepare_matirx(cluster_set, force) -def disambiguate_last_name(personids, last_name, pure, only_missing): - ''' - Creates a cluster_set from personid and calls disambiguate. - ''' - if only_missing and in_results(last_name): - return - cs = Cluster_set() - if pure: - cs.create_pure(personids, last_name) - else: - cs.create_skeleton(personids, last_name) - disambiguate(cs) +def force_create_matrix(cluster_set, force): + bibauthor_print("Building a cluster set.") + return create_matrix(cluster_set(), force) -def disambiguate(cluster_set): - ''' - Updates personid from a list of personids, sharing common - last name, and this last name. - ''' - bibs = sum(len(c.bibs) for c in cluster_set.clusters) +def wedge_and_store(cluster_set): + bibs = cluster_set.num_all_bibs expected = bibs * (bibs - 1) / 2 bibauthor_print("Start working on %s. Total number of bibs: %d, " "maximum number of comparisons: %d" - % (cluster_set.last_name, bibs, expected)) + % (cluster_set.last_name, bibs, expected)) wedge(cluster_set) + remove_result_cluster(cluster_set.last_name) cluster_set.store() - + return True + + +def force_wedge_and_store(cluster_set): + bibauthor_print("Building a cluster set.") + return wedge_and_store(cluster_set()) + + +def schedule_create_matrix(cluster_sets, sizes, force): + def create_job(cluster): + def ret(): + return force_create_matrix(cluster, force) + return ret + + memfile_path = None + if bconfig.DEBUG_PROCESS_PEAK_MEMORY: + tt = datetime.now() + tt = (tt.hour, tt.minute, tt.day, tt.month, tt.year) + memfile_path = ('%smatrix_memory_%d:%d_%d-%d-%d.log' % + ((bconfig.TORTOISE_FILES_PATH,) + tt)) + + return schedule(map(create_job, cluster_sets), + sizes, + Estimator(matrix_coefs), + memfile_path) + + +def schedule_wedge_and_store(cluster_sets, sizes): + def create_job(cluster): + def ret(): + return force_wedge_and_store(cluster) + return ret + + memfile_path = None + if bconfig.DEBUG_PROCESS_PEAK_MEMORY: + tt = datetime.now() + tt = (tt.hour, tt.minute, tt.day, tt.month, tt.year) + memfile_path = ('%swedge_memory_%d:%d_%d-%d-%d.log' % + ((bconfig.TORTOISE_FILES_PATH,) + tt)) + + return schedule(map(create_job, cluster_sets), + sizes, + Estimator(wedge_coefs), + memfile_path) diff --git a/modules/bibauthorid/lib/bibauthorid_webapi.py b/modules/bibauthorid/lib/bibauthorid_webapi.py index 2294c3c19..bc2550a94 100644 --- a/modules/bibauthorid/lib/bibauthorid_webapi.py +++ b/modules/bibauthorid/lib/bibauthorid_webapi.py @@ -1,1393 +1,1409 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ''' Bibauthorid_webapi Point of access to the documents clustering facility. Provides utilities to safely interact with stored data. ''' import invenio.bibauthorid_config as bconfig import invenio.bibauthorid_frontinterface as dbapi import invenio.bibauthorid_name_utils as nameapi import invenio.search_engine as search_engine from search_engine import perform_request_search from cgi import escape from time import gmtime, strftime, ctime from invenio.dbquery import OperationalError from invenio.access_control_admin import acc_find_user_role_actions from invenio.webuser import collect_user_info, get_session, getUid from invenio.webuser import isUserSuperAdmin from invenio.access_control_engine import acc_authorize_action from invenio.access_control_admin import acc_get_role_id, acc_get_user_roles from invenio.external_authentication_robot import ExternalAuthRobot from invenio.external_authentication_robot import load_robot_keys from invenio.config import CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL from invenio.config import CFG_SITE_URL from invenio.mailutils import send_email +from invenio.bibauthorid_dbinterface import get_personiID_external_ids #export + def get_person_redirect_link(pid): ''' Returns the canonical name of a pid if found, the pid itself otherwise @param pid: int ''' cname = dbapi.get_canonical_id_from_personid(pid) if len(cname) > 0: return str(cname[0][0]) else: return str(pid) def update_person_canonical_name(person_id, canonical_name, userinfo=''): ''' Updates a person's canonical name @param person_id: person id @param canonical_name: string ''' + if userinfo.count('||'): + uid = userinfo.split('||')[0] + else: + uid = '' dbapi.update_personID_canonical_names((person_id,), overwrite=True, suggested=canonical_name) - dbapi.insert_user_log(userinfo, person_id, 'data_update', 'CMPUI_changecanonicalname', '', 'Canonical name manually updated.') + dbapi.insert_user_log(userinfo, person_id, 'data_update', 'CMPUI_changecanonicalname', '', 'Canonical name manually updated.', userid=uid) def get_canonical_id_from_person_id(person_id): ''' Finds the person canonical name from personid (e.g. 1) @param person_id: the canonical ID @type person_id: string @return: result from the request or person_id on failure @rtype: int ''' if not person_id or not (isinstance(person_id, str) or isinstance(person_id, int)): return person_id canonical_name = person_id try: canonical_name = dbapi.get_canonical_id_from_personid(person_id)[0][0] except IndexError: pass return canonical_name def get_person_id_from_canonical_id(canonical_id): ''' Finds the person id from a canonical name (e.g. Ellis_J_R_1) @param canonical_id: the canonical ID @type canonical_id: string @return: result from the request or -1 on failure @rtype: int ''' if not canonical_id or not isinstance(canonical_id, str): return -1 pid = -1 try: pid = dbapi.get_person_id_from_canonical_id(canonical_id)[0][0] except IndexError: pass return pid def get_bibrefs_from_bibrecs(bibreclist): ''' Retrieve all bibrefs for all the recids in the list @param bibreclist: list of record IDs @type bibreclist: list of int @return: a list of record->bibrefs @return: list of lists ''' return [[bibrec, dbapi.get_possible_bibrecref([''], bibrec, always_match=True)] for bibrec in bibreclist] def get_possible_bibrefs_from_pid_bibrec(pid, bibreclist, always_match=False, additional_names=None): ''' Returns for each bibrec a list of bibrefs for which the surname matches. @param pid: person id to gather the names strings from @param bibreclist: list of bibrecs on which to search @param always_match: match all bibrefs no matter the name @param additional_names: [n1,...,nn] names to match other then the one from personid ''' pid = wash_integer_id(pid) pid_names = dbapi.get_person_db_names_set(pid) if additional_names: pid_names += zip(additional_names) lists = [] for bibrec in bibreclist: lists.append([bibrec, dbapi.get_possible_bibrecref([n[0] for n in pid_names], bibrec, always_match)]) return lists def get_pid_from_uid(uid): ''' Return the PID associated with the uid @param uid: the internal ID of a user @type uid: int @return: the Person ID attached to the user or -1 if none found ''' if not isinstance(uid, tuple): uid = ((uid,),) return dbapi.get_personid_from_uid(uid) def get_user_level(uid): ''' Finds and returns the aid-universe-internal numeric user level @param uid: the user's id @type uid: int @return: A numerical representation of the maximum access level of a user @rtype: int ''' actions = [row[1] for row in acc_find_user_role_actions({'uid': uid})] return max([dbapi.resolve_paper_access_right(acc) for acc in actions]) def get_person_id_from_paper(bibref=None): ''' Returns the id of the person who wrote the paper @param bibref: the bibref,bibrec pair that identifies the person @type bibref: str @return: the person id @rtype: int ''' if not is_valid_bibref(bibref): return -1 person_id = -1 db_data = dbapi.get_papers_status(bibref) try: person_id = db_data[0][1] except (IndexError): pass return person_id def get_papers_by_person_id(person_id= -1, rec_status= -2, ext_out=False): ''' Returns all the papers written by the person @param person_id: identifier of the person to retrieve papers from @type person_id: int @param rec_status: minimal flag status a record must have to be displayed @type rec_status: int @param ext_out: Extended output (w/ author aff and date) @type ext_out: boolean @return: list of record ids @rtype: list of int ''' if not isinstance(person_id, int): try: person_id = int(person_id) except (ValueError, TypeError): return [] if person_id < 0: return [] if not isinstance(rec_status, int): return [] records = [] db_data = dbapi.get_person_papers(person_id, rec_status, show_author_name=True, show_title=False, show_rt_status=True, show_affiliations=ext_out, show_date=ext_out, show_experiment=ext_out) if not ext_out: records = [[row["data"].split(",")[1], row["data"], row["flag"], row["authorname"]] for row in db_data] else: for row in db_data: recid = row["data"].split(",")[1] bibref = row["data"] flag = row["flag"] authorname = row["authorname"] rt_status = row['rt_status'] authoraff = ", ".join(row['affiliation']) try: date = sorted(row['date'], key=len)[0] except IndexError: date = "Not available" exp = ", ".join(row['experiment']) #date = "" records.append([recid, bibref, flag, authorname, authoraff, date, rt_status, exp]) return records def get_papers_cluster(bibref): ''' Returns the cluster of documents connected with this one @param bibref: the table:bibref,bibrec pair to look for @type bibref: str @return: a list of record IDs @rtype: list of int ''' papers = [] person_id = get_person_id_from_paper(bibref) if person_id > -1: papers = get_papers_by_person_id(person_id) return papers def get_person_request_ticket(pid= -1, tid=None): ''' Returns the list of request tickets associated to a person. @param pid: person id @param tid: ticket id, to select if want to retrieve only a particular one @return: tickets [[],[]] ''' if pid < 0: return [] else: return dbapi.get_request_ticket(pid, ticket_id=tid) def get_persons_with_open_tickets_list(): ''' Finds all the persons with open tickets and returns pids and count of tickets @return: [[pid,ticket_count]] ''' return dbapi.get_persons_with_open_tickets_list() def get_person_names_from_id(person_id= -1): ''' Finds and returns the names associated with this person along with the frequency of occurrence (i.e. the number of papers) @param person_id: an id to find the names for @type person_id: int @return: name and number of occurrences of the name @rtype: tuple of tuple ''' # #retrieve all rows for the person if (not person_id > -1) or (not isinstance(person_id, int)): return [] return dbapi.get_person_names_count(person_id) def get_person_db_names_from_id(person_id= -1): ''' Finds and returns the names associated with this person as stored in the meta data of the underlying data set along with the frequency of occurrence (i.e. the number of papers) @param person_id: an id to find the names for @type person_id: int @return: name and number of occurrences of the name @rtype: tuple of tuple ''' # #retrieve all rows for the person if (not person_id > -1) or (not isinstance(person_id, int)): return [] return dbapi.get_person_db_names_count(person_id) def get_longest_name_from_pid(person_id= -1): ''' Finds the longest name of a person to be representative for this person. @param person_id: the person ID to look at @type person_id: int @return: returns the longest normalized name of a person @rtype: string ''' if (not person_id > -1) or (not isinstance(person_id, int)): return "This doesn't look like a person ID!" longest_name = "" for name in dbapi.get_person_names_count(person_id): if name and len(name[0]) > len(longest_name): longest_name = name[0] if longest_name: return longest_name else: return "This person does not seem to have a name!" def get_most_frequent_name_from_pid(person_id= -1, allow_none=False): ''' Finds the most frequent name of a person to be representative for this person. @param person_id: the person ID to look at @type person_id: int @return: returns the most frequent normalized name of a person @rtype: string ''' pid = wash_integer_id(person_id) if (not pid > -1) or (not isinstance(pid, int)): if allow_none: return None else: return "'%s' doesn't look like a person ID!" % person_id person_id = pid mf_name = "" try: nn = dbapi.get_person_names_count(person_id) mf_name = sorted(nn, key=lambda k:k[1], reverse=True)[0][0] except IndexError: pass if mf_name: return mf_name else: if allow_none: return None else: return "This person does not seem to have a name!" def get_paper_status(bibref): ''' Finds an returns the status of a bibrec to person assignment @param bibref: the bibref-bibrec pair that unambiguously identifies a paper @type bibref: string ''' db_data = dbapi.get_papers_status(bibref) #data,PersonID,flag status = None try: status = db_data[0][2] except IndexError: status = -10 status = wash_integer_id(status) return status def wash_integer_id(param_id): ''' Creates an int out of either int or string @param param_id: the number to be washed @type param_id: int or string @return: The int representation of the param or -1 @rtype: int ''' pid = -1 try: pid = int(param_id) except (ValueError, TypeError): return (-1) return pid def is_valid_bibref(bibref): ''' Determines if the provided string is a valid bibref-bibrec pair @param bibref: the bibref-bibrec pair that unambiguously identifies a paper @type bibref: string @return: True if it is a bibref-bibrec pair and False if it's not @rtype: boolean ''' if (not isinstance(bibref, str)) or (not bibref): return False if not bibref.count(":"): return False if not bibref.count(","): return False try: table = bibref.split(":")[0] ref = bibref.split(":")[1].split(",")[0] bibrec = bibref.split(":")[1].split(",")[1] except IndexError: return False try: table = int(table) ref = int(ref) bibrec = int(bibrec) except (ValueError, TypeError): return False return True def is_valid_canonical_id(cid): ''' Checks if presented canonical ID is valid in structure Must be of structure: ([Initial|Name]\.)*Lastname\.Number Example of valid cid: J.Ellis.1 @param cid: The canonical ID to check @type cid: string @return: Is it valid? @rtype: boolean ''' if not cid.count("."): return False xcheck = -1 sp = cid.split(".") if not (len(sp) > 1 and sp[-1]): return False try: xcheck = int(sp[-1]) except (ValueError, TypeError, IndexError): return False if xcheck and xcheck > -1: return True else: return False #def confirm_person_bibref_assignments(person_id, bibrefs, uid): # ''' # Confirms a bibref-bibrec assignment to a person. That internally # raises the flag of the entry to 2, which means 'user confirmed' and # sets the user level to the highest level of the user provided as param # # @param person_id: the id of the person to confirm the assignment to # @type person_id: int # @param bibrefs: the bibref-bibrec pairs that unambiguously identify records # @type bibrefs: list of strings # @param uid: the id of the user that arranges the confirmation # @type uid: int # # @return: True if the process ran smoothly, False if there was an error # @rtype: boolean # ''' # pid = wash_integer_id(person_id) # refs = [] # # if pid < 0: # return False # # if not isinstance(bibrefs, list) or not len(bibrefs): # return False # else: # for bibref in bibrefs: # if is_valid_bibref(bibref): # refs.append((bibref,)) # else: # return False # # try: # dbapi.confirm_papers_to_person((pid,), refs, get_user_level(uid)) # except OperationalError: # return False # # return True # # #def repeal_person_bibref_assignments(person_id, bibrefs, uid): # ''' # Repeals a bibref-bibrec assignment from a person. That internally # sets the flag of the entry to -2, which means 'user repealed' and # sets the user level to the highest level of the user provided as param # # @param person_id: the id of the person to repeal the assignment from # @type person_id: int # @param bibrefs: the bibref-bibrec pairs that unambiguously identify records # @type bibrefs: list of strings # @param uid: the id of the user that arranges the repulsion # @type uid: int # # @return: True if the process ran smoothly, False if there was an error # @rtype: boolean # ''' # pid = wash_integer_id(person_id) # refs = [] # # if pid < 0: # return False # # if not isinstance(bibrefs, list) or not len(bibrefs): # return False # else: # for bibref in bibrefs: # if is_valid_bibref(bibref): # refs.append((bibref,)) # else: # return False # # try: # dbapi.reject_papers_from_person((pid,), refs, get_user_level(uid)) # except OperationalError: # return False # # return True # # #def reset_person_bibref_decisions(person_id, bibrefs): # ''' # Resets a bibref-bibrec assignment of a person. That internally # sets the flag of the entry to 0, which means 'no user interaction' and # sets the user level to 0 to give the record free for claiming/curation # # @param person_id: the id of the person to reset the assignment from # @type person_id: int # @param bibrefs: the bibref-bibrec pairs that unambiguously identify records # @type bibrefs: list of strings # # @return: True if the process ran smoothly, False if there was an error # @rtype: boolean # ''' # pid = wash_integer_id(person_id) # refs = [] # # if pid < 0: # return False # # if not isinstance(bibrefs, list) or not len(bibrefs): # return False # else: # for bibref in bibrefs: # if is_valid_bibref(bibref): # refs.append((bibref,)) # else: # return False # # try: # dbapi.reset_papers_flag((person_id,), refs) # except OperationalError: # return False # # return True # # def add_person_comment(person_id, message): ''' Adds a comment to a person after enriching it with meta-data (date+time) @param person_id: person id to assign the comment to @type person_id: int @param message: defines the comment to set @type message: string @return the message incl. the metadata if everything was fine, False on err @rtype: string or boolean ''' msg = "" pid = -1 try: msg = str(message) pid = int(person_id) except (ValueError, TypeError): return False strtimestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime()) msg = escape(msg, quote=True) dbmsg = "%s;;;%s" % (strtimestamp, msg) dbapi.set_person_data(pid, "comment", dbmsg) return dbmsg def get_person_comments(person_id): ''' Get all comments from a person @param person_id: person id to get the comments from @type person_id: int @return the message incl. the metadata if everything was fine, False on err @rtype: string or boolean ''' pid = -1 comments = [] try: pid = int(person_id) except (ValueError, TypeError): return False for row in dbapi.get_person_data(pid, "comment"): comments.append(row[1]) return comments def search_person_ids_by_name(namequery): ''' Prepares the search to search in the database @param namequery: the search query the user enquired @type namequery: string @return: information about the result w/ probability and occurrence @rtype: tuple of tuple ''' query = "" escaped_query = "" try: query = str(namequery) except (ValueError, TypeError): return [] if query: escaped_query = escape(query, quote=True) else: return [] return dbapi.find_personIDs_by_name_string(escaped_query) def insert_log(userinfo, personid, action, tag, value, comment='', transactionid=0): ''' Log an action performed by a user Examples (in the DB): 1 2010-09-30 19:30 admin||10.0.0.1 1 assign paper 1133:4442 'from 23' 1 2010-09-30 19:30 admin||10.0.0.1 1 assign paper 8147:4442 2 2010-09-30 19:35 admin||10.0.0.1 1 reject paper 72:4442 @param userinfo: information about the user [UID|IP] @type userinfo: string @param personid: ID of the person this action is targeting @type personid: int @param action: intended action @type action: string @param tag: A tag to describe the data entered @type tag: string @param value: The value of the action described by the tag @type value: string @param comment: Optional comment to describe the transaction @type comment: string @param transactionid: May group bulk operations together @type transactionid: int @return: Returns the current transactionid @rtype: int ''' userinfo = escape(str(userinfo)) action = escape(str(action)) tag = escape(str(tag)) value = escape(str(value)) comment = escape(str(comment)) if not isinstance(personid, int): try: personid = int(personid) except (ValueError, TypeError): return -1 if not isinstance(transactionid, int): try: transactionid = int(transactionid) except (ValueError, TypeError): return -1 + if userinfo.count('||'): + uid = userinfo.split('||')[0] + else: + uid = '' + return dbapi.insert_user_log(userinfo, personid, action, tag, - value, comment, transactionid) + value, comment, transactionid, userid=uid) def user_can_modify_data(uid, pid): ''' Determines if a user may modify the data of a person @param uid: the id of a user (invenio user id) @type uid: int @param pid: the id of a person @type pid: int @return: True if the user may modify data, False if not @rtype: boolean @raise ValueError: if the supplied parameters are invalid ''' if not isinstance(uid, int): try: uid = int(uid) except (ValueError, TypeError): raise ValueError("User ID has to be a number!") if not isinstance(pid, int): try: pid = int(pid) except (ValueError, TypeError): raise ValueError("Person ID has to be a number!") return dbapi.user_can_modify_data(uid, pid) def user_can_modify_paper(uid, paper): ''' Determines if a user may modify the record assignments of a person @param uid: the id of a user (invenio user id) @type uid: int @param pid: the id of a person @type pid: int @return: True if the user may modify data, False if not @rtype: boolean @raise ValueError: if the supplied parameters are invalid ''' if not isinstance(uid, int): try: uid = int(uid) except (ValueError, TypeError): raise ValueError("User ID has to be a number!") if not paper: raise ValueError("A bibref is expected!") return dbapi.user_can_modify_paper(uid, paper) def person_bibref_is_touched_old(pid, bibref): ''' Determines if an assignment has been touched by a user (i.e. check for the flag of an assignment being 2 or -2) @param pid: the id of the person to check against @type pid: int @param bibref: the bibref-bibrec pair that unambiguously identifies a paper @type bibref: string @raise ValueError: if the supplied parameters are invalid ''' if not isinstance(pid, int): try: pid = int(pid) except (ValueError, TypeError): raise ValueError("Person ID has to be a number!") if not bibref: raise ValueError("A bibref is expected!") return dbapi.person_bibref_is_touched_old(pid, bibref) def get_review_needing_records(pid): ''' Returns list of records associated to pid which are in need of review (only bibrec ma no bibref selected) @param pid: pid ''' pid = wash_integer_id(pid) db_data = dbapi.get_person_papers_to_be_manually_reviewed(pid) return [int(row[1]) for row in db_data if row[1]] def add_review_needing_record(pid, bibrec_id): ''' Add record in need of review to a person @param pid: pid @param bibrec_id: bibrec ''' pid = wash_integer_id(pid) bibrec_id = wash_integer_id(bibrec_id) dbapi.add_person_paper_needs_manual_review(pid, bibrec_id) def del_review_needing_record(pid, bibrec_id): ''' Removes a record in need of review from a person @param pid: personid @param bibrec_id: bibrec ''' pid = wash_integer_id(pid) bibrec_id = wash_integer_id(bibrec_id) dbapi.del_person_papers_needs_manual_review(pid, bibrec_id) def get_processed_external_recids(pid): ''' Get list of records that have been processed from external identifiers @param pid: Person ID to look up the info for @type pid: int @return: list of record IDs @rtype: list of strings ''' list_str = dbapi.get_processed_external_recids(pid) return list_str.split(";") def set_processed_external_recids(pid, recid_list): ''' Set list of records that have been processed from external identifiers @param pid: Person ID to set the info for @type pid: int @param recid_list: list of recids @type recid_list: list of int ''' if isinstance(recid_list, list): recid_list_str = ";".join(recid_list) dbapi.set_processed_external_recids(pid, recid_list_str) def arxiv_login(req): ''' Log in through arxive. If user already associated to a personid, returns the personid. If user has no pid, try to guess which personid to associate based on surname and papers from arxiv. If no compatible person is found, creates a new person. At the end of the process opens a ticket for the user claiming the papers from arxiv. !!! the user will find the open ticket, which will require him to go through the final review before getting committed. @param req: Apache request object @type req: Apache request object @return: Returns the pid resulting in the process @rtype: int ''' def session_bareinit(req): session = get_session(req) try: pinfo = session["personinfo"] if 'ticket' not in pinfo: pinfo["ticket"] = [] except KeyError: pinfo = dict() session['personinfo'] = pinfo pinfo["ticket"] = [] session.save() session_bareinit(req) session = get_session(req) pinfo = session['personinfo'] ticket = session['personinfo']['ticket'] uinfo = collect_user_info(req) pinfo['external_first_entry'] = False try: name = uinfo['external_firstname'] except KeyError: name = '' try: surname = uinfo['external_familyname'] except KeyError: surname = '' if surname: session['personinfo']['arxiv_name'] = nameapi.create_normalized_name( nameapi.split_name_parts(surname + ', ' + name)) else: session['personinfo']['arxiv_name'] = '' session.save() try: arxiv_p_ids = uinfo['external_arxivids'].split(';') except KeyError: arxiv_p_ids = [] #'external_arxivids': 'hep-th/0112017;hep-th/0112020', #'external_familyname': 'Weiler', #'external_firstname': 'Henning', try: found_bibrecs = set(zip(*[perform_request_search(p='037:' + str(arx), of='id', rg=0) for arx in arxiv_p_ids])[0]) except IndexError: found_bibrecs = set() #found_bibrecs = [567700, 567744] uid = getUid(req) pid, pid_found = dbapi.get_personid_from_uid([[uid]]) if not pid_found: pid = dbapi.reclaim_personid_for_new_arXiv_user(found_bibrecs, nameapi.create_normalized_name(nameapi.split_name_parts(surname + ', ' + name)), uid) else: pid = pid[0] pid_bibrecs = set([i[0] for i in dbapi.get_all_personids_recs(pid, claimed_only=True)]) missing_bibrecs = found_bibrecs - pid_bibrecs #present_bibrecs = found_bibrecs.intersection(pid_bibrecs) #assert len(found_bibrecs) == len(missing_bibrecs) + len(present_bibrecs) tempticket = [] #now we have to open the tickets... #person_papers contains the papers which are already assigned to the person and came from arxive, #they can be claimed regardless for bibrec in missing_bibrecs: tempticket.append({'pid':pid, 'bibref':str(bibrec), 'action':'confirm'}) #check if ticket targets (bibref for pid) are already in ticket for t in list(tempticket): for e in list(ticket): if e['pid'] == t['pid'] and e['bibref'] == t['bibref']: ticket.remove(e) ticket.append(t) session.save() return pid def external_user_can_perform_action(uid): ''' Check for SSO user and if external claims will affect the decision wether or not the user may use the Invenio claiming platform @param uid: the user ID to check permissions for @type uid: int @return: is user allowed to perform actions? @rtype: boolean ''' #If no EXTERNAL_CLAIMED_RECORDS_KEY we bypass this check if not bconfig.EXTERNAL_CLAIMED_RECORDS_KEY: return True uinfo = collect_user_info(uid) keys = [] for k in bconfig.EXTERNAL_CLAIMED_RECORDS_KEY: if k in uinfo: keys.append(k) full_key = False for k in keys: if uinfo[k]: full_key = True break return full_key def is_external_user(uid): ''' Check for SSO user and if external claims will affect the decision wether or not the user may use the Invenio claiming platform @param uid: the user ID to check permissions for @type uid: int @return: is user allowed to perform actions? @rtype: boolean ''' #If no EXTERNAL_CLAIMED_RECORDS_KEY we bypass this check if not bconfig.EXTERNAL_CLAIMED_RECORDS_KEY: return False uinfo = collect_user_info(uid) keys = [] for k in bconfig.EXTERNAL_CLAIMED_RECORDS_KEY: if k in uinfo: keys.append(k) full_key = False for k in keys: if uinfo[k]: full_key = True break return full_key def check_transaction_permissions(uid, bibref, pid, action): ''' Check if the user can perform the given action on the given pid,bibrefrec pair. return in: granted, denied, warning_granted, warning_denied @param uid: The internal ID of a user @type uid: int @param bibref: the bibref pair to check permissions for @type bibref: string @param pid: the Person ID to check on @type pid: int @param action: the action that is to be performed @type action: string @return: granted, denied, warning_granted xor warning_denied @rtype: string ''' c_own = True c_override = False is_superadmin = isUserSuperAdmin({'uid': uid}) access_right = _resolve_maximum_acces_rights(uid) bibref_status = dbapi.get_bibref_modification_status(bibref) old_flag = bibref_status[0] if old_flag == 2 or old_flag == -2: if action in ['confirm', 'assign']: new_flag = 2 elif action in ['repeal']: new_flag = -2 elif action in ['reset']: new_flag = 0 if old_flag != new_flag: c_override = True uid_pid = dbapi.get_personid_from_uid([[uid]]) if not uid_pid[1] or pid != uid_pid[0][0]: c_own = False #if we cannot override an already touched bibref, no need to go on checking if c_override: if is_superadmin: return 'warning_granted' if access_right[1] < bibref_status[1]: return "warning_denied" else: if is_superadmin: return 'granted' #let's check if invenio is allowing us the action we want to perform if c_own: action = bconfig.CLAIMPAPER_CLAIM_OWN_PAPERS else: action = bconfig.CLAIMPAPER_CLAIM_OTHERS_PAPERS auth = acc_authorize_action(uid, action) if auth[0] != 0: return "denied" #now we know if claiming for ourselfs, we can ask for external ideas if c_own: action = 'claim_own_paper' else: action = 'claim_other_paper' ext_permission = external_user_can_perform_action(uid) #if we are here invenio is allowing the thing and we are not overwriting a #user with higher privileges, if externals are ok we go on! if ext_permission: if not c_override: return "granted" else: return "warning_granted" return "denied" def delete_request_ticket(pid, ticket): ''' Delete a request ticket associated to a person @param pid: pid (int) @param ticket: ticket id (int) ''' dbapi.delete_request_ticket(pid, ticket) def delete_transaction_from_request_ticket(pid, tid, action, bibref): ''' Deletes a transaction from a ticket. If ticket empty, deletes it. @param pid: pid @param tid: ticket id @param action: action @param bibref: bibref ''' rt = get_person_request_ticket(pid, tid) if len(rt) > 0: # rt_num = rt[0][1] rt = rt[0][0] else: return for t in list(rt): if str(t[0]) == str(action) and str(t[1]) == str(bibref): rt.remove(t) action_present = False for t in rt: if str(t[0]) in ['confirm', 'repeal']: action_present = True if not action_present: delete_request_ticket(pid, tid) return dbapi.update_request_ticket(pid, rt, tid) def create_request_ticket(userinfo, ticket): ''' Creates a request ticket @param usernfo: dictionary of info about user @param ticket: dictionary ticket ''' # write ticket to DB # send eMail to RT udata = [] mailcontent = [] m = mailcontent.append m("A user sent a change request through the web interface.") m("User Information:") for k, v in userinfo.iteritems(): if v: m(" %s: %s" % (k, v)) m("\nLinks to all issued Person-based requests:\n") for i in userinfo: udata.append([i, userinfo[i]]) tic = {} for t in ticket: if not t['action'] in ['confirm', 'assign', 'repeal', 'reset']: return False elif t['pid'] < 0: return False elif not is_valid_bibref(t['bibref']): return False if t['action'] == 'reset': #we ignore reset tickets continue else: if t['pid'] not in tic: tic[t['pid']] = [] if t['action'] == 'assign': t['action'] = 'confirm' tic[t['pid']].append([t['action'], t['bibref']]) for pid in tic: data = [] for i in udata: data.append(i) data.append(['date', ctime()]) for i in tic[pid]: data.append(i) dbapi.update_request_ticket(pid, data) pidlink = get_person_redirect_link(pid) m("%s/person/%s?open_claim=True#tabTickets" % (CFG_SITE_URL, pidlink)) m("\nPlease remember that you have to be logged in " "in order to see the ticket of a person.\n") if ticket and tic and mailcontent: sender = CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL if bconfig.TICKET_SENDING_FROM_USER_EMAIL and userinfo['email']: sender = userinfo['email'] send_email(sender, CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL, subject="[Author] Change Request", content="\n".join(mailcontent)) return True def send_user_commit_notification_email(userinfo, ticket): ''' Sends commit notification email to RT system ''' # send eMail to RT mailcontent = [] m = mailcontent.append m("A user committed a change through the web interface.") m("User Information:") for k, v in userinfo.iteritems(): if v: m(" %s: %s" % (k, v)) m("\nChanges:\n") for t in ticket: m(" --- --- \n") for k, v in t.iteritems(): m(" %s: %s \n" % (str(k), str(v))) if k == 'bibref': try: br = int(v.split(',')[1]) m(" Title: %s\n" % search_engine.get_fieldvalues(br, "245__a")) except (TypeError, ValueError, IndexError): pass m(" --- --- \n") if ticket and mailcontent: sender = CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL send_email(sender, CFG_BIBAUTHORID_AUTHOR_TICKET_ADMIN_EMAIL, subject="[Author] NO ACTIONS NEEDED. Changes performed by SSO user.", content="\n".join(mailcontent)) return True def user_can_view_CMP(uid): action = bconfig.CLAIMPAPER_VIEW_PID_UNIVERSE auth = acc_authorize_action(uid, action) if auth[0] == 0: return True else: return False def _resolve_maximum_acces_rights(uid): ''' returns [max_role, lcul] to use in execute_action and check_transaction_permissions. Defaults to ['guest',0] if user has no roles assigned. Always returns the maximum privilege. ''' roles = {bconfig.CLAIMPAPER_ADMIN_ROLE: acc_get_role_id(bconfig.CLAIMPAPER_ADMIN_ROLE), bconfig.CLAIMPAPER_USER_ROLE: acc_get_role_id(bconfig.CLAIMPAPER_USER_ROLE)} uroles = acc_get_user_roles(uid) max_role = ['guest', 0] for r in roles: if roles[r] in uroles: rright = bconfig.CMPROLESLCUL[r] if rright >= max_role[1]: max_role = [r, rright] return max_role def create_new_person(uid, uid_is_owner=False): ''' Create a new person. @param uid: User ID to attach to the person @type uid: int @param uid_is_owner: Is the uid provided owner of the new person? @type uid_is_owner: bool @return: the resulting person ID of the new person @rtype: int ''' pid = dbapi.create_new_person(uid, uid_is_owner=uid_is_owner) return pid def execute_action(action, pid, bibref, uid, userinfo='', comment=''): ''' Executes the action, setting the last user right according to uid @param action: the action to perform @type action: string @param pid: the Person ID to perform the action on @type pid: int @param bibref: the bibref pair to perform the action for @type bibref: string @param uid: the internal user ID of the currently logged in user @type uid: int @return: success of the process @rtype: boolean ''' pid = wash_integer_id(pid) if not action in ['confirm', 'assign', 'repeal', 'reset']: return False elif pid < 0: return False elif pid == -3: pid = dbapi.create_new_person(uid, uid_is_owner=False) elif not is_valid_bibref(bibref): return False + if userinfo.count('||'): + uid = userinfo.split('||')[0] + else: + uid = '' + user_level = _resolve_maximum_acces_rights(uid)[1] if action in ['confirm', 'assign']: - dbapi.insert_user_log(userinfo, pid, 'assign', 'CMPUI_ticketcommit', bibref, comment) + dbapi.insert_user_log(userinfo, pid, 'assign', 'CMPUI_ticketcommit', bibref, comment, userid=uid) dbapi.confirm_papers_to_person((str(pid),), [[bibref]], user_level) elif action in ['repeal']: - dbapi.insert_user_log(userinfo, pid, 'repeal', 'CMPUI_ticketcommit', bibref, comment) - dbapi.reject_papers_from_person(pid, [bibref], user_level) + dbapi.insert_user_log(userinfo, pid, 'repeal', 'CMPUI_ticketcommit', bibref, comment, userid=uid) + dbapi.reject_papers_from_person((str(pid),), [[bibref]], user_level) elif action in ['reset']: - dbapi.insert_user_log(userinfo, pid, 'reset', 'CMPUI_ticketcommit', bibref, comment) + dbapi.insert_user_log(userinfo, pid, 'reset', 'CMPUI_ticketcommit', bibref, comment, userid=uid) dbapi.reset_papers_flag((str(pid),), [[bibref]]) else: return False #This is the only point which modifies a person, so this can trigger the #deletion of a cached page dbapi.delete_cached_author_page(pid) return True def sign_assertion(robotname, assertion): ''' Sign an assertion for the export of IDs @param robotname: name of the robot. E.g. 'arxivz' @type robotname: string @param assertion: JSONized object to sign @type assertion: string @return: The signature @rtype: string ''' robotname = "" secr = "" if not robotname: return "" robot = ExternalAuthRobot() keys = load_robot_keys() try: secr = keys["Robot"][robotname] except: secr = "" return robot.sign(secr, assertion) diff --git a/modules/bibauthorid/lib/bibauthorid_wedge.py b/modules/bibauthorid/lib/bibauthorid_wedge.py index 328d309d5..8197a987d 100644 --- a/modules/bibauthorid/lib/bibauthorid_wedge.py +++ b/modules/bibauthorid/lib/bibauthorid_wedge.py @@ -1,344 +1,327 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import bibauthorid_config as bconfig from itertools import izip, starmap from operator import mul +from bibauthorid_backinterface import Bib_matrix from bibauthorid_general_utils import update_status \ , update_status_final \ , bibauthor_print \ , wedge_print -from bibauthorid_prob_matrix import probability_matrix +from bibauthorid_prob_matrix import ProbabilityMatrix import numpy eps = 0.001 # The lower bound of the edges being processed by the wedge algorithm. edge_cut_prob = bconfig.WEDGE_THRESHOLD / 2 -special_items = ((None, -3.), ('+', -2.), ('-', -1.)) -special_symbols = dict((x[0], x[1]) for x in special_items) -special_numbers = dict((x[1], x[0]) for x in special_items) - def wedge(cluster_set): - if bconfig.DEBUG_CHECKS: - assert cluster_set._debug_test_hate_relation() - assert cluster_set._debug_duplicated_recs() - -# import guppy; hp = guppy.hpy(); import pdb; pdb.set_trace(); - matr = probability_matrix(cluster_set, True, True) + matr = ProbabilityMatrix() + matr.load(cluster_set.last_name) - new2old = convert_cluster_set(cluster_set, matr) + convert_cluster_set(cluster_set, matr) del matr # be sure that this is the last reference! - do_wedge(cluster_set, new2old) + do_wedge(cluster_set) - restore_cluster_set(cluster_set, new2old) + restore_cluster_set(cluster_set) if bconfig.DEBUG_CHECKS: assert cluster_set._debug_test_hate_relation() assert cluster_set._debug_duplicated_recs() -def do_wedge(cluster_set, mapping, deep_debug = False): +def do_wedge(cluster_set, deep_debug=False): ''' Rearranges the cluster_set acoarding to be values in the probability_matrix. The deep debug option will produce a lot of output. Avoid using it with more than 20 bibs in the cluster set. ''' def decide(cl1, cl2): score1 = compare_to(cl1, cl2) score2 = compare_to(cl2, cl1) return compare_to_final_bounds(score1, score2) def compare_to(cl1, cl2): pointers = [cl1.out_edges[v] for v in cl2.bibs] assert pointers vals, probs = zip(*pointers) avg = sum(vals) / len(vals) if avg > eps: nvals = ((val / avg) ** prob for val, prob in pointers) else: return 0 coeff = gini(nvals) weight = sum(starmap(mul, pointers)) / sum(probs) wedge_print("Wedge: Decide: vals = %s, probs = %s" % (str(vals), str(probs))) wedge_print("Wedge: Decide: coeff = %f, weight = %f" % (coeff, weight)) return coeff * weight def gini(arr): arr = sorted(arr, reverse=True) dividend = sum(starmap(mul, izip(arr, xrange(1, 2 * len(arr), 2)))) divisor = len(arr) * sum(arr) return float(dividend) / divisor def compare_to_final_bounds(score1, score2): return score1 + score2 > bconfig.WEDGE_THRESHOLD def edge_sorting(edge): ''' probability + certainty / 10 ''' return edge[2][0] + edge[2][1] / 10. - if bconfig.DEBUG_CHECKS: - assert cluster_set._debug_test_hate_relation() - assert cluster_set._debug_duplicated_recs(mapping) bib_map = create_bib_2_cluster_dict(cluster_set) plus_edges, minus_edges, edges = group_edges(cluster_set) for i, (bib1, bib2) in enumerate(plus_edges): update_status(float(i) / len(plus_edges), "Agglomerating obvious clusters...") cl1 = bib_map[bib1] cl2 = bib_map[bib2] if cl1 != cl2 and not cl1.hates(cl2): join(cl1, cl2) cluster_set.clusters.remove(cl2) for v in cl2.bibs: bib_map[v] = cl1 - if bconfig.DEBUG_CHECKS: - assert cluster_set._debug_test_hate_relation() - assert cluster_set._debug_duplicated_recs(mapping) update_status_final("Agglomerating obvious clusters done.") for i, (bib1, bib2) in enumerate(minus_edges): update_status(float(i) / len(minus_edges), "Dividing obvious clusters...") cl1 = bib_map[bib1] cl2 = bib_map[bib2] if cl1 != cl2 and not cl1.hates(cl2): cl1.quarrel(cl2) update_status_final("Dividing obvious clusters done.") bibauthor_print("Sorting the value edges.") edges = sorted(edges, key=edge_sorting, reverse=True) interval = 1000 wedge_print("Wedge: New wedge, %d edges." % len(edges)) for current, (v1, v2, unused) in enumerate(edges): if (current % interval) == 0: update_status(float(current) / len(edges), "Wedge...") assert unused != '+' and unused != '-' - if bconfig.DEBUG_CHECKS: - assert cluster_set._debug_test_hate_relation() - assert cluster_set._debug_duplicated_recs(mapping) - wedge_print("Wedge: poped new edge: Verts = %s, %s Value = (%f, %f)" % (v1, v2, unused[0], unused[1])) cl1 = bib_map[v1] cl2 = bib_map[v2] if cl1 != cl2 and not cl1.hates(cl2): if deep_debug: - export_to_dot(cluster_set, "/tmp/%s%d.dot" % (cluster_set.last_name, current), mapping, (v1, v2, unused)) + export_to_dot(cluster_set, "/tmp/%s%d.dot" % (cluster_set.last_name, current), cluster_set.mapping, (v1, v2, unused)) if decide(cl1, cl2): wedge_print("Wedge: Joined!") join(cl1, cl2) cluster_set.clusters.remove(cl2) for v in cl2.bibs: bib_map[v] = cl1 else: wedge_print("Wedge: Quarreled!") cl1.quarrel(cl2) elif cl1 == cl2: wedge_print("Wedge: Clusters already joined!") else: wedge_print("Wedge: Clusters hate each other!") update_status_final("Wedge done.") bibauthor_print("") if deep_debug: - export_to_dot(cluster_set, "/tmp/%sfinal.dot" % cluster_set.last_name, mapping) + export_to_dot(cluster_set, "/tmp/%sfinal.dot" % cluster_set.last_name, cluster_set.mapping) def meld_edges(p1, p2): ''' Creates one out_edges set from two. The operation is associative and commutative. The objects are: (out_edges for in a cluster, number of vertices in the same cluster) ''' out_edges1, verts1 = p1 out_edges2, verts2 = p2 def median(e1, e2): - if e1[0] in special_numbers: + if e1[0] in Bib_matrix.special_numbers: return e1 - if e2[0] in special_numbers: + if e2[0] in Bib_matrix.special_numbers: return e2 inter_cert = e1[1] * verts1 + e2[1] * verts2 inter_prob = e1[0] * e1[1] * verts1 + e2[0] * e2[1] * verts2 return (inter_prob / inter_cert, inter_cert / (verts1 + verts2)) assert len(out_edges1) == len(out_edges2) size = len(out_edges1) result = numpy.ndarray(shape=(size, 2), dtype=float, order='C') for i in xrange(size): result[i] = median(out_edges1[i], out_edges2[i]) return (result, verts1 + verts2) def convert_cluster_set(cs, prob_matr): ''' Convertes a normal cluster set to a wedge clsuter set. @param cs: a cluster set to be converted @param type: cluster set @return: a mapping from a number to a bibrefrec. ''' # step 1: # + Assign a number to each bibrefrec. # + Replace the arrays of bibrefrecs with arrays of numbers. # + Store the result and prepare it to be returned. result_mapping = [] for clus in cs.clusters: start = len(result_mapping) result_mapping += list(clus.bibs) end = len(result_mapping) clus.bibs = range(start, end) assert len(result_mapping) == len(set(result_mapping)) + assert len(result_mapping) == cs.num_all_bibs + + cs.new2old = result_mapping # step 2: # + Using the prob matrix create a vector values to all other bibs. # + Meld those vectors into one for each cluster. for current, c1 in enumerate(cs.clusters): update_status(float(current) / len(cs.clusters), "Converting the cluster set...") assert len(c1.bibs) > 0 pointers = [] for v1 in c1.bibs: pointer = numpy.ndarray(shape=(len(result_mapping), 2), dtype=float, order='C') - pointer.fill(special_symbols[None]) + pointer.fill(Bib_matrix.special_symbols[None]) for c2 in cs.clusters: if c1 != c2 and not c1.hates(c2): for v2 in c2.bibs: val = prob_matr[result_mapping[v1], result_mapping[v2]] - if val in special_symbols: - numb = special_symbols[val] + if val in Bib_matrix.special_symbols: + numb = Bib_matrix.special_symbols[val] val = (numb, numb) assert len(val) == 2 pointer[v2] = val pointers.append((pointer, 1)) c1.out_edges = reduce(meld_edges, pointers)[0] update_status_final("Converting the cluster set done.") - return result_mapping - -def restore_cluster_set(cs, new2old): +def restore_cluster_set(cs): for cl in cs.clusters: - cl.bibs = set(new2old[b] for b in cl.bibs) + cl.bibs = set(cs.new2old[b] for b in cl.bibs) del cl.out_edges + cs.update_bibs() def create_bib_2_cluster_dict(cs): ''' Creates and returns a dictionary bibrefrec -> cluster. The cluster set must be converted! ''' size = sum(len(cl.bibs) for cl in cs.clusters) ret = range(size) for cl in cs.clusters: for bib in cl.bibs: ret[bib] = cl return ret def group_edges(cs): plus = [] minus = [] pairs = [] for current, cl1 in enumerate(cs.clusters): update_status(float(current) / len(cs.clusters), "Grouping all edges...") bib1 = tuple(cl1.bibs)[0] pointers = cl1.out_edges for bib2 in xrange(len(cl1.out_edges)): val = pointers[bib2] - if val[0] not in special_numbers: + if val[0] not in Bib_matrix.special_numbers: if val[0] > edge_cut_prob: pairs.append((bib1, bib2, val)) - elif val[0] == special_symbols['+']: + elif val[0] == Bib_matrix.special_symbols['+']: plus.append((bib1, bib2)) - elif val[0] == special_symbols['-']: + elif val[0] == Bib_matrix.special_symbols['-']: minus.append((bib1, bib2)) else: - assert val[0] == special_symbols[None] + assert val[0] == Bib_matrix.special_symbols[None] update_status_final("Finished with the edge grouping.") bibauthor_print("Positive edges: %d, Negative edges: %d, Value edges: %d." % (len(plus), len(minus), len(pairs))) return plus, minus, pairs def join(cl1, cl2): ''' Joins two clusters from a cluster set in the first. ''' cl1.out_edges = meld_edges((cl1.out_edges, len(cl1.bibs)), (cl2.out_edges, len(cl2.bibs)))[0] cl1.bibs += cl2.bibs assert not cl1.hates(cl1) assert not cl2.hates(cl2) cl1.hate |= cl2.hate for cl in cl2.hate: cl.hate.remove(cl2) cl.hate.add(cl1) def export_to_dot(cs, fname, graph_info, extra_edge=None): from bibauthorid_dbinterface import get_name_by_bibrecref fptr = open(fname, "w") fptr.write("graph wedgy {\n") fptr.write(" overlap=prism\n") for idx, bib in enumerate(graph_info): fptr.write(' %d [color=black label="%s"];\n' % (idx, get_name_by_bibrecref(bib))) if extra_edge: v1, v2, (prob, cert) = extra_edge fptr.write(' %d -- %d [color=green label="p: %.2f, c: %.2f"];\n' % (v1, v2, prob, cert)) for clus in cs.clusters: fptr.write(" %s [color=blue];\n" % " -- ".join(str(x) for x in clus.bibs)) fptr.write("".join(" %d -- %d [color=red]\n" % (b1, b2) for b1 in clus.bibs for h in clus.hate for b2 in h.bibs)) fptr.write("}") - - diff --git a/modules/bibcirculation/lib/bibcirculation_utils.py b/modules/bibcirculation/lib/bibcirculation_utils.py index 9c7ccdf38..4baedf7b7 100644 --- a/modules/bibcirculation/lib/bibcirculation_utils.py +++ b/modules/bibcirculation/lib/bibcirculation_utils.py @@ -1,948 +1,957 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. -## Copyright (C) 2008, 2009, 2010, 2011 CERN. +## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibCirculation Utils: Auxiliary methods of BibCirculation """ __revision__ = "$Id$" import datetime import random import re import time from invenio.search_engine_utils import get_fieldvalues import invenio.bibcirculation_dblayer as db from invenio.search_engine import get_field_tags from invenio.bibtask import task_low_level_submission from invenio.textutils import encode_for_xml from invenio.config import CFG_SITE_URL, CFG_TMPDIR, CFG_SITE_LANG from invenio.bibcirculation_config import \ CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY, \ CFG_BIBCIRCULATION_WORKING_DAYS, \ CFG_BIBCIRCULATION_HOLIDAYS, \ CFG_CERN_SITE, \ CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, \ CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, \ CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS, \ CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING, \ CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING, \ CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, \ CFG_BIBCIRCULATION_LOAN_STATUS_EXPIRED, \ CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED +from invenio.urlutils import create_html_link, make_invenio_opener +from invenio.config import CFG_SITE_URL, CFG_TMPDIR +from invenio.bibcirculation_config import CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY, \ + CFG_BIBCIRCULATION_WORKING_DAYS, \ + CFG_BIBCIRCULATION_HOLIDAYS +from invenio.messages import gettext_set_language + +import datetime, time, re + +BIBCIRCULATION_OPENER = make_invenio_opener('BibCirculation') DICC_REGEXP = re.compile("^\{('[^']*': ?('[^']*'|\"[^\"]+\"|[0-9]*|None)(, ?'[^']*': ?('[^']*'|\"[^\"]+\"|[0-9]*|None))*)?\}$") from invenio.messages import gettext_set_language def search_user(column, string): if string is not None: string = string.strip() if CFG_CERN_SITE == 1: if column == 'name': result = db.search_borrower_by_name(string) else: if column == 'email': try: result = db.search_borrower_by_email(string) except: result = () else: try: result = db.search_borrower_by_ccid(string) except: result = () if result == (): from invenio.bibcirculation_cern_ldap \ import get_user_info_from_ldap ldap_info = 'busy' while ldap_info == 'busy': time.sleep(1) if column == 'id' or column == 'ccid': ldap_info = get_user_info_from_ldap(ccid=string) elif column == 'email': ldap_info = get_user_info_from_ldap(email=string) else: ldap_info = get_user_info_from_ldap(nickname=string) if len(ldap_info) == 0: result = () else: try: name = ldap_info['displayName'][0] except KeyError: name = "" try: email = ldap_info['mail'][0] except KeyError: email = "" try: phone = ldap_info['telephoneNumber'][0] except KeyError: phone = "" try: address = ldap_info['physicalDeliveryOfficeName'][0] except KeyError: address = "" try: mailbox = ldap_info['postOfficeBox'][0] except KeyError: mailbox = "" try: ccid = ldap_info['employeeID'][0] except KeyError: ccid = "" try: db.new_borrower(ccid, name, email, phone, address, mailbox, '') except: pass result = db.search_borrower_by_ccid(int(ccid)) else: if column == 'name': result = db.search_borrower_by_name(string) elif column == 'email': result = db.search_borrower_by_email(string) else: result = db.search_borrower_by_id(string) return result def update_user_info_from_ldap(user_id): from invenio.bibcirculation_cern_ldap import get_user_info_from_ldap ccid = db.get_borrower_ccid(user_id) ldap_info = 'busy' while ldap_info == 'busy': time.sleep(1) ldap_info = get_user_info_from_ldap(ccid=ccid) if len(ldap_info) == 0: result = () else: try: name = ldap_info['displayName'][0] except KeyError: name = "" try: email = ldap_info['mail'][0] except KeyError: email = "" try: phone = ldap_info['telephoneNumber'][0] except KeyError: phone = "" try: address = ldap_info['physicalDeliveryOfficeName'][0] except KeyError: address = "" try: mailbox = ldap_info['postOfficeBox'][0] except KeyError: mailbox = "" db.update_borrower(user_id, name, email, phone, address, mailbox) result = db.search_borrower_by_ccid(int(ccid)) return result def get_book_cover(isbn): """ Retrieve book cover using Amazon web services. @param isbn: book's isbn @type isbn: string @return book cover """ from xml.dom import minidom - import urllib # connect to AWS - cover_xml = urllib.urlopen('http://ecs.amazonaws.com/onca/xml' \ + cover_xml = BIBCIRCULATION_OPENER.open('http://ecs.amazonaws.com/onca/xml' \ '?Service=AWSECommerceService&AWSAccessKeyId=' \ + CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY + \ '&Operation=ItemSearch&Condition=All&' \ 'ResponseGroup=Images&SearchIndex=Books&' \ 'Keywords=' + isbn) # parse XML try: xml_img = minidom.parse(cover_xml) retrieve_book_cover = xml_img.getElementsByTagName('MediumImage') book_cover = retrieve_book_cover.item(0).firstChild.firstChild.data except: book_cover = "%s/img/book_cover_placeholder.gif" % (CFG_SITE_URL) return book_cover def book_information_from_MARC(recid): """ Retrieve book's information from MARC @param recid: identify the record. Primary key of bibrec. @type recid: int @return tuple with title, year, author, isbn and editor. """ # FIXME do the same that book_title_from_MARC book_title = book_title_from_MARC(recid) book_year = ''.join(get_fieldvalues(recid, "260__c")) author_tags = ['100__a', '700__a', '721__a'] book_author = '' for tag in author_tags: l = get_fieldvalues(recid, tag) for c in l: book_author += c + '; ' book_author = book_author[:-2] l = get_fieldvalues(recid, "020__a") book_isbn = '' for isbn in l: book_isbn += isbn + ', ' book_isbn = book_isbn[:-2] book_editor = ', '.join(get_fieldvalues(recid, "260__a") + \ get_fieldvalues(recid, "260__b")) return (book_title, book_year, book_author, book_isbn, book_editor) def book_title_from_MARC(recid): """ Retrieve book's title from MARC @param recid: identify the record. Primary key of bibrec. @type recid: int @return book's title """ title_tags = get_field_tags('title') book_title = '' i = 0 while book_title == '' and i < len(title_tags): l = get_fieldvalues(recid, title_tags[i]) for candidate in l: book_title = book_title + candidate + ': ' i += 1 book_title = book_title[:-2] return book_title def update_status_if_expired(loan_id): """ Update the loan's status if status is 'expired'. @param loan_id: identify the loan. Primary key of crcLOAN. @type loan_id: int """ loan_status = db.get_loan_status(loan_id) if loan_status == CFG_BIBCIRCULATION_LOAN_STATUS_EXPIRED: db.update_loan_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, loan_id) return def get_next_day(date_string): """ Get the next day @param date_string: date @type date_string: string return next day """ # add 1 day more_1_day = datetime.timedelta(days=1) # convert date_string to datetime format tmp_date = time.strptime(date_string, '%Y-%m-%d') # calculate the new date (next day) next_day = datetime.datetime(tmp_date[0], tmp_date[1], tmp_date[2]) \ + more_1_day return next_day def generate_new_due_date(days): """ Generate a new due date (today + X days = new due date). @param days: number of days @type days: string @return new due date """ today = datetime.date.today() more_X_days = datetime.timedelta(days=days) tmp_date = today + more_X_days week_day = tmp_date.strftime('%A') due_date = tmp_date.strftime('%Y-%m-%d') due_date_validated = False while not due_date_validated: if week_day in CFG_BIBCIRCULATION_WORKING_DAYS \ and due_date not in CFG_BIBCIRCULATION_HOLIDAYS: due_date_validated = True else: next_day = get_next_day(due_date) due_date = next_day.strftime('%Y-%m-%d') week_day = next_day.strftime('%A') return due_date def renew_loan_for_X_days(barcode): """ Renew a loan based on its loan period @param barcode: identify the item. Primary key of crcITEM. @type barcode: string @return new due date """ loan_period = db.get_loan_period(barcode) if loan_period == '4 weeks': due_date = generate_new_due_date(30) else: due_date = generate_new_due_date(7) return due_date def make_copy_available(request_id): """ Change the status of a copy for CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF when an hold request was cancelled. @param request_id: identify the request: Primary key of crcLOANREQUEST @type request_id: int """ barcode_requested = db.get_requested_barcode(request_id) db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, barcode_requested) update_requests_statuses(barcode_requested) def print_new_loan_information(req, ln=CFG_SITE_LANG): """ Create a printable format with the information of the last loan who has been registered on the table crcLOAN. """ _ = gettext_set_language(ln) # get the last loan from crcLOAN (recid, borrower_id, due_date) = db.get_last_loan() # get book's information (book_title, book_year, book_author, book_isbn, book_editor) = book_information_from_MARC(recid) # get borrower's data/information (name, address, email) (borrower_name, borrower_address, borrower_mailbox, borrower_email) = db.get_borrower_data(borrower_id) # Generate printable format req.content_type = "text/html" req.send_http_header() out = """""" out += """

""" % (CFG_SITE_URL) out += """""" out += """ """ % (_("Loan information")) out += """ """ % (_("This book has been sent to you:")) out += """

%s

%s

""" out += """""" out += """ """ % (_("Title"), book_title, _("Author"), book_author, _("Editor"), book_editor, _("ISBN"), book_isbn, _("Year"), book_year) out += """
%s %s
%s %s
%s %s
%s %s
%s %s

""" out += """""" out += """ """ % (_("Name"), borrower_name, _("Mailbox"), borrower_mailbox, _("Address"), borrower_address, _("Email"), borrower_email) out += """
%s %s
%s %s
%s %s
%s %s

""" out += """""" out += """ """ % (_("Due date"), due_date) out += """

%s: %s

""" out += """
""" req.write("") req.write(out) req.write("") return "\n" def print_pending_hold_requests_information(req, ln): """ Create a printable format with all the information about all pending hold requests. """ _ = gettext_set_language(ln) requests = db.get_pdf_request_data(CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING) req.content_type = "text/html" req.send_http_header() out = """""" out += """

""" % (CFG_SITE_URL) out += """""" out += """ """ % (_("List of pending hold requests")) out += """ """ % (time.ctime()) out += """

%s

%s

""" out += """""" out += """ """ % (_("Borrower"), _("Item"), _("Library"), _("Location"), _("From"), _("To"), _("Request date")) for (recid, borrower_name, library_name, location, date_from, date_to, request_date) in requests: out += """ """ % (borrower_name, book_title_from_MARC(recid), library_name, location, date_from, date_to, request_date) out += """
%s %s %s %s %s %s %s
%s %s %s %s %s %s %s


""" req.write("") req.write(out) req.write("") return "\n" def get_item_info_for_search_result(recid): """ Get the item's info from MARC in order to create a search result with more details @param recid: identify the record. Primary key of bibrec. @type recid: int @return book's informations (author, editor and number of copies) """ book_author = ' '.join(get_fieldvalues(recid, "100__a") + \ get_fieldvalues(recid, "100__u")) book_editor = ' , '.join(get_fieldvalues(recid, "260__a") + \ get_fieldvalues(recid, "260__b") + \ get_fieldvalues(recid, "260__c")) book_copies = ' '.join(get_fieldvalues(recid, "964__a")) book_infos = (book_author, book_editor, book_copies) return book_infos def update_request_data(request_id): """ Update the status of a given request. @param request_id: identify the request: Primary key of crcLOANREQUEST @type request_id: int """ barcode = db.get_request_barcode(request_id) is_on_loan = db.is_item_on_loan(barcode) if is_on_loan is not None: db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode) else: db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, barcode) update_requests_statuses(barcode) return True def compare_dates(date): """ Compare given date with today @param date: given date @type date: string @return boolean """ if date < time.strftime("%Y-%m-%d"): return False else: return True def validate_date_format(date): """ Verify the date format @param date: given date @type date: string @return boolean """ try: if time.strptime(date, "%Y-%m-%d"): if compare_dates(date): return True else: return False except ValueError: return False def create_ill_record(book_info): """ Create a new ILL record @param book_info: book's information @type book_info: tuple @return MARC record """ (title, author, place, publisher, year, edition, isbn) = book_info ill_record = """ %(isbn)s %(author)s %(title)s %(edition)s %(place)s %(publisher)s %(year)s ILLBOOK """ % {'isbn': encode_for_xml(isbn), 'author': encode_for_xml(author), 'title': encode_for_xml(title), 'edition': encode_for_xml(edition), 'place': encode_for_xml(place), 'publisher': encode_for_xml(publisher), 'year': encode_for_xml(year)} file_path = '%s/%s_%s.xml' % (CFG_TMPDIR, 'bibcirculation_ill_book', time.strftime("%Y%m%d_%H%M%S")) xml_file = open(file_path, 'w') xml_file.write(ill_record) xml_file.close() # Pass XML file to BibUpload. task_low_level_submission('bibupload', 'bibcirculation', '-P', '5', '-i', file_path) return ill_record def wash_recid_from_ILL_request(ill_request_id): """ Get dictionnary and wash recid values. @param ill_request_id: identify the ILL request. Primray key of crcILLREQUEST @type ill_request_id: int @return recid """ book_info = db.get_ill_book_info(ill_request_id) if looks_like_dictionary(book_info): book_info = eval(book_info) else: book_info = None try: recid = int(book_info['recid']) except KeyError: recid = None return recid def all_copies_are_missing(recid): """ Verify if all copies of an item are missing @param recid: identify the record. Primary key of bibrec @type recid: int @return boolean """ copies_status = db.get_copies_status(recid) number_of_missing = 0 if copies_status == None: return True else: for (status) in copies_status: if status == 'missing': number_of_missing += 1 if number_of_missing == len(copies_status): return True else: return False #def has_copies(recid): # """ # Verify if a recid is item (has copies) # # @param recid: identify the record. Primary key of bibrec # @type recid: int # # @return boolean # """ # # copies_status = db.get_copies_status(recid) # # if copies_status is None: # return False # else: # if len(copies_status) == 0: # return False # else: # return True def generate_email_body(template, loan_id): """ Generate the body of an email for loan recalls. @param template: email template @type template: string @param loan_id: identify the loan. Primary key of crcLOAN. @type loan_id: int @return email(body) """ recid = db.get_loan_recid(loan_id) (book_title, book_year, book_author, book_isbn, book_editor) = book_information_from_MARC(int(recid)) out = template % (book_title, book_year, book_author, book_isbn, book_editor) return out def create_item_details_url(recid, ln): url = '/admin2/bibcirculation/get_item_details?ln=%s&recid=%s' % (ln, str(recid)) return CFG_SITE_URL + url def tag_all_requests_as_done(barcode, user_id): recid = db.get_recid(barcode) list_of_barcodes = db.get_barcodes(recid) for bc in list_of_barcodes: db.tag_requests_as_done(bc, user_id) def update_requests_statuses(barcode, recid=None): if recid == None: recid = db.get_recid(barcode) list_of_pending_requests = db.get_requests(recid, CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING) some_copy_available = False copies_status = db.get_copies_status(recid) if copies_status is not None: for status in copies_status: if status in (CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS): some_copy_available = True if len(list_of_pending_requests) == 1: if not some_copy_available: db.update_loan_request_status(list_of_pending_requests[0][0], CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING) else: return list_of_pending_requests[0][0] elif len(list_of_pending_requests) == 0: if some_copy_available: list_of_waiting_requests = db.get_requests(recid, CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING) if len(list_of_waiting_requests) > 0: db.update_loan_request_status(list_of_waiting_requests[0][0], CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING) return list_of_waiting_requests[0][0] elif len(list_of_pending_requests) > 1: for request in list_of_pending_requests: db.update_loan_request_status(request[0], CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING) list_of_waiting_requests = db.get_requests(recid, CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING) if some_copy_available: db.update_loan_request_status(list_of_waiting_requests[0][0], CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING) return list_of_waiting_requests[0][0] return None def is_periodical(recid): rec_type = get_fieldvalues(recid, "690C_a") if len(rec_type) > 0: for value in rec_type: if value == 'PERI': return True return False def has_date_format(date): if type(date) is not str: return False date = date.strip() if len(date) is not 10: return False elif date[4] is not '-' and date[7] is not '-': return False else: year = date[:4] month = date[5:7] day = date[8:] return year.isdigit() and month.isdigit() and day.isdigit() def generate_tmp_barcode(): tmp_barcode = 'tmp-' + str(random.random())[-8:] while(db.barcode_in_use(tmp_barcode)): tmp_barcode = 'tmp-' + str(random.random())[-8:] return tmp_barcode def check_database(): from invenio.dbquery import run_sql r1 = run_sql(""" SELECT it.barcode, it.status, ln.status FROM crcITEM it, crcLOAN ln WHERE ln.barcode=it.barcode AND it.status=%s AND ln.status!=%s AND ln.status!=%s AND ln.status!=%s """, (CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, CFG_BIBCIRCULATION_LOAN_STATUS_EXPIRED, CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED)) r2 = run_sql(""" SELECT it.barcode FROM crcITEM it, crcLOAN ln WHERE ln.barcode=it.barcode AND it.status=%s AND (ln.status=%s or ln.status=%s) """, (CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, CFG_BIBCIRCULATION_LOAN_STATUS_EXPIRED)) r3 = run_sql(""" SELECT l1.barcode, l1.id, DATE_FORMAT(l1.loaned_on,'%%Y-%%m-%%d %%H:%%i:%%s'), DATE_FORMAT(l2.loaned_on,'%%Y-%%m-%%d %%H:%%i:%%s') FROM crcLOAN l1, crcLOAN l2 WHERE l1.id!=l2.id AND l1.status!=%s AND l1.status=l2.status AND l1.barcode=l2.barcode ORDER BY l1.loaned_on """, (CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED, )) r4 = run_sql(""" SELECT id, id_crcBORROWER, barcode, due_date, number_of_renewals FROM crcLOAN WHERE status=%s AND due_date>NOW() """, (CFG_BIBCIRCULATION_LOAN_STATUS_EXPIRED, )) return (len(r1), len(r2), len(r3), len(r4)) def looks_like_dictionary(candidate_string): if re.match(DICC_REGEXP, candidate_string): return True else: return False diff --git a/modules/bibcirculation/lib/bibcirculationadminlib.py b/modules/bibcirculation/lib/bibcirculationadminlib.py index 27bdea9ee..a2872210b 100644 --- a/modules/bibcirculation/lib/bibcirculationadminlib.py +++ b/modules/bibcirculation/lib/bibcirculationadminlib.py @@ -1,6804 +1,6805 @@ ## Administrator interface for Bibcirculation ## ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ## """Invenio Bibcirculation Administrator Interface.""" from __future__ import division """Invenio Bibcirculation Administrator Interface.""" __revision__ = "$Id$" __lastupdated__ = """$Date$""" import datetime, time, types # Others Invenio imports from invenio.config import \ CFG_SITE_LANG, \ CFG_SITE_SECURE_URL, \ CFG_CERN_SITE import invenio.access_control_engine as acce from invenio.webpage import page from invenio.webuser import getUid, page_not_authorized from invenio.webstat import register_customevent from invenio.errorlib import register_exception from invenio.mailutils import send_email from invenio.search_engine import perform_request_search, record_exists from invenio.urlutils import create_html_link, redirect_to_url from invenio.messages import gettext_set_language from invenio.bibcirculation_utils import book_title_from_MARC, \ update_status_if_expired, \ renew_loan_for_X_days, \ print_pending_hold_requests_information, \ print_new_loan_information, \ validate_date_format, \ generate_email_body, \ book_information_from_MARC, \ search_user, \ tag_all_requests_as_done, \ update_user_info_from_ldap, \ update_requests_statuses, \ has_date_format, \ generate_tmp_barcode, \ looks_like_dictionary #is_periodical, \ #create_ill_record, \ #create_item_details_url from invenio.webstat import register_customevent from invenio.errorlib import register_exception # Bibcirculation imports from invenio.bibcirculation_config import \ CFG_BIBCIRCULATION_TEMPLATES, CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY, \ CFG_BIBCIRCULATION_LIBRARIAN_EMAIL, CFG_BIBCIRCULATION_LOANS_EMAIL, \ CFG_BIBCIRCULATION_ACQ_TYPE import invenio.bibcirculation_dblayer as db from invenio.bibcirculation_cern_ldap import get_user_info_from_ldap +from invenio.bibcirculation_utils import looks_like_dictionary, \ + BIBCIRCULATION_OPENER import invenio.template bc_templates = invenio.template.load('bibcirculation') from invenio.config import \ CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, \ CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, \ CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS, \ CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, \ CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED, \ CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING, \ CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING, \ CFG_BIBCIRCULATION_REQUEST_STATUS_DONE, \ CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED, \ CFG_BIBCIRCULATION_ILL_STATUS_NEW, \ CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN, \ CFG_BIBCIRCULATION_ACQ_STATUS_NEW def is_adminuser(req): """check if user is a registered administrator. """ return acce.acc_authorize_action(req, "runbibcirculation") def mustloginpage(req, message): """show a page asking the user to login.""" navtrail_previous_links = '' \ 'Admin Area > ' \ '' \ 'BibCirculation Admin ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL) return page_not_authorized(req=req, text=message, navtrail=navtrail_previous_links) def index(req, ln=CFG_SITE_LANG): """main function to show pages for bibcirculationadmin """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_index(ln=ln) return page(title=_("BibCirculation Admin"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def borrower_search(req, empty_barcode, redirect_to_new_request=False, ln=CFG_SITE_LANG): """ Page (for administrator) where is it possible to search for a borrower (who is on crcBORROWER table) using his/her name, email, phone or id. If redirect_to_new_request is False, the returned page will be "Borrower details" If redirect_to_new_request is True, the returned page will be "New Request" """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if empty_barcode: infos.append(empty_barcode) navtrail_previous_links = 'Admin Area' \ ' > ' \ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) body = bc_templates.tmpl_borrower_search(infos=infos, redirect_to_new_request=redirect_to_new_request, ln=ln) if redirect_to_new_request: title = _("New Request") else: title = _("Borrower Search") return page(title=title, uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def borrower_search_result(req, column, string, redirect_to_new_request=False, ln=CFG_SITE_LANG): """ Search a borrower and return a list with all the possible results. @type column: string @param column: identify the column, of the table crcBORROWER, that will be considered during the search. Can be 'name', 'email' or 'id'. @type string: string @param string: string used for the search process. If redirect_to_new_request is True, the returned page will be "Borrower details" If redirect_to_new_request is False, the returned page will be "New Request" """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if string == '': message = _('Empty string.') + ' ' + _('Please, try again.') return borrower_search(req, message, redirect_to_new_request, ln) else: result = search_user(column, string) navtrail_previous_links = 'Admin Area' \ ' > Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) if len(result) == 1: if redirect_to_new_request: return create_new_request_step1(req, result[0][0]) else: return get_borrower_details(req, result[0][0], False, ln) #return create_new_request_step1(req, borrower_id, p, f, search, ln) else: body = bc_templates.tmpl_borrower_search_result(result=result, redirect_to_new_request=redirect_to_new_request, ln=ln) return page(title=_("Borrower search result"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def item_search(req, infos=[], ln=CFG_SITE_LANG): """ Display a form where is possible to searh for an item. """ navtrail_previous_links = 'Admin Area' \ ' > ' \ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) _ = gettext_set_language(ln) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) body = bc_templates.tmpl_item_search(infos=infos, ln=ln) return page(title=_("Item search"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def item_search_result(req, p, f, ln=CFG_SITE_LANG): """ Search an item and return a list with all the possible results. To retrieve the information desired, we use the method 'perform_request_search' (from search_engine.py). In the case of BibCirculation, we are just looking for books (items) inside the collection 'Books'. @type p: string @param p: search pattern @type f: string @param f: search field """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if p == '': infos.append(_('Empty string.') + ' ' + _('Please, try again.')) return item_search(req, infos, ln) if f == 'barcode': p = p.strip('\'" \t') recid = db.get_recid(p) if recid is None: infos.append(_('The barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s does not exist on BibCirculation database.') % {'x_barcode': p, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_item_search(infos=infos, ln=ln) else: return get_item_details(req, recid, ln=ln) elif f == 'recid': p = p.strip('\'" \t') recid = p if not record_exists(recid): infos.append(_("Requested record does not seem to exist.")) body = bc_templates.tmpl_item_search(infos=infos, ln=ln) else: return get_item_details(req, recid, ln=ln) else: result = perform_request_search(cc="Books", sc="1", p=p, f=f) body = bc_templates.tmpl_item_search_result(result=result, ln=ln) navtrail_previous_links = 'Admin Area' \ ' > ' \ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) return page(title=_("Item search result"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def load_template(template): """ Load a letter/notification template from bibcirculation_config.py. @type template: string. @param template: template that will be used. @return: template(string) """ if template == "overdue_letter": output = CFG_BIBCIRCULATION_TEMPLATES['OVERDUE'] elif template == "reminder": output = CFG_BIBCIRCULATION_TEMPLATES['REMINDER'] elif template == "notification": output = CFG_BIBCIRCULATION_TEMPLATES['NOTIFICATION'] elif template == "claim_return": output = CFG_BIBCIRCULATION_TEMPLATES['SEND_RECALL'] else: output = CFG_BIBCIRCULATION_TEMPLATES['EMPTY'] return output def borrower_notification(req, borrower_id, borrower_email, template, message, load_msg_template, subject, send_message, ln=CFG_SITE_LANG): """ Send a message/email to a borrower. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. @type borrower_email: string. @param borrower_email: The librarian can change the email manually. In that case, this value will be taken instead of the borrower details email @type template: string. @param template: identify the template that will be used in the notification. @type message: string. @param message: message written by the administrator. @type subject: string. @param subject: subject of the message. @return: send a message/email to a borrower. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) if borrower_email != None: email = borrower_email else: email = db.get_borrower_email(borrower_id) if load_msg_template and template is not None: show_template = load_template(template) elif send_message: send_email(fromaddr = CFG_BIBCIRCULATION_LIBRARIAN_EMAIL, toaddr = email, subject = subject, content = message, header = '', footer = '', attempt_times = 1, attempt_sleeptime = 10 ) body = bc_templates.tmpl_send_notification(ln=ln) else: show_template = load_template(template) body = bc_templates.tmpl_borrower_notification(email=email, subject=subject, email_body=show_template, borrower_id=borrower_id, ln=ln) navtrail_previous_links = 'Admin Area' \ ' > Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) return page(title="Borrower Notification", uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_next_waiting_loan_request(req, recid, barcode, check_id, ln=CFG_SITE_LANG): """ *** Obsolete and unmantained function *** Return the next loan request that is waiting or pending. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type check_id: integer. @param check_id: identify the hold request. It is also the primary key of the table crcLOANREQUEST. @return: list of waiting requests with the same recid. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if check_id: db.update_loan_request_status(check_id, CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED) #update_request_data(check_id) else: db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, barcode) db.return_loan(barcode) update_requests_statuses(barcode) result = db.get_next_waiting_loan_request(recid) navtrail_previous_links = 'Admin Area' \ ' > Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL) body = bc_templates.tmpl_get_next_waiting_loan_request(result=result, recid=recid, barcode=barcode, ln=ln) return page(title=_("Next requests"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def make_new_loan_from_request(req, check_id, barcode, ln=CFG_SITE_LANG): """ Turns a request into a loan. @type check_id: integer. @param check_id: identify the hold request. It is also the primary key of the table crcLOANREQUEST. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] recid = db.get_request_recid(check_id) borrower_id = db.get_request_borrower_id(check_id) borrower_info = db.get_borrower_details(borrower_id) due_date = renew_loan_for_X_days(barcode) if db.is_item_on_loan(barcode): infos.append('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.' % {'x_barcode': barcode, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) return redirect_to_url(req, '%s/admin2/bibcirculation/all_loans?ln=%s&msg=ok' % (CFG_SITE_SECURE_URL, ln)) else: db.new_loan(borrower_id, recid, barcode, due_date, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, 'normal', '') infos.append(_('A new loan has been registered with success.')) #try: # register_customevent("baskets", ["display", "", user_str]) #except: # register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'") tag_all_requests_as_done(barcode, borrower_id) db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode) update_requests_statuses(barcode) navtrail_previous_links = 'Admin Area' \ ' > ' \ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) body = bc_templates.tmpl_register_new_loan(borrower_info=borrower_info, infos=infos, recid=recid, ln=ln) return page(title=_("New Loan"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def loan_return(req, ln=CFG_SITE_LANG): """ Page where is possible to register the return of an item. """ _ = gettext_set_language(ln) infos = [] navtrail_previous_links = 'Admin Area' \ ' > Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) body = bc_templates.tmpl_loan_return(infos=infos, ln=ln) return page(title=_("Loan return"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def loan_return_confirm(req, barcode, ln=CFG_SITE_LANG): """ Performs the return of a loan and displays a confirmation page. In case the book is requested, it is possible to select a request and make a loan from it (make_new_loan_from_request) @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) infos = [] _ = gettext_set_language(ln) recid = db.get_id_bibrec(barcode) loan_id = db.is_item_on_loan(barcode) if recid is None: infos.append(_('%(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s Unknown barcode.') % {'x_barcode': barcode, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} + ' ' + _('Please, try again.')) body = bc_templates.tmpl_loan_return(infos=infos, ln=ln) elif loan_id is None: message = _("The item the with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is not on loan. Please, try again.") % {'x_barcode': barcode, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} infos.append(message) body = bc_templates.tmpl_loan_return(infos=infos, ln=ln) else: library_id = db.get_item_info(barcode)[1] if CFG_CERN_SITE: library_type = db.get_library_type(library_id) if library_type != CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN: library_name = db.get_library_name(library_id) message = _("%(x_strong_tag_open)sWARNING:%(x_strong_tag_close)s Note that item %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s location is %(x_strong_tag_open)s%(x_location)s%(x_strong_tag_close)s") % {'x_barcode': barcode, 'x_strong_tag_open': '', 'x_strong_tag_close': '', 'x_location': library_name} infos.append(message) borrower_id = db.get_borrower_id(barcode) borrower_name = db.get_borrower_name(borrower_id) db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, barcode) db.return_loan(barcode) update_requests_statuses(barcode) result = db.get_next_waiting_loan_request(recid) body = bc_templates.tmpl_loan_return_confirm( infos=infos, borrower_name=borrower_name, borrower_id=borrower_id, recid=recid, barcode=barcode, return_date=datetime.date.today(), result=result, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("Loan return"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def loan_on_desk_step1(req, key, string, ln=CFG_SITE_LANG): """ Step 1/4 of loan procedure. Search a user/borrower and return a list with all the possible results. @type key: string. @param key: attribute that will be considered during the search. Can be 'name', 'email' or 'ccid/id'. @type string: string. @param string: keyword used during the search. @return: list of potential borrowers. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) infos = [] _ = gettext_set_language(ln) if key and not string: infos.append(_('Empty string. Please, try again.')) body = bc_templates.tmpl_loan_on_desk_step1(result=None, key=key, string=string, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("Loan on desk"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) result = search_user(key, string) borrowers_list = [] if len(result) == 0 and key: if CFG_CERN_SITE: infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID.")) else: new_borrower_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/add_new_borrower_step1', {'ln': ln}, _("Register new borrower.")) message = _("0 borrowers found.") + ' ' + new_borrower_link infos.append(message) elif len(result) == 1: return loan_on_desk_step2(req, result[0][0], ln) else: for user in result: borrower_data = db.get_borrower_data_by_id(user[0]) borrowers_list.append(borrower_data) body = bc_templates.tmpl_loan_on_desk_step1(result=borrowers_list, key=key, string=string, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("Circulation management"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def loan_on_desk_step2(req, user_id, ln=CFG_SITE_LANG): """ Step 2/4 of loan procedure. Display the user/borrower's information. @type user_id: integer @param user_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) infos = [] _ = gettext_set_language(ln) body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id, infos=infos, ln=ln) return page(title=_("Circulation management"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def loan_on_desk_step3(req, user_id, list_of_barcodes, ln=CFG_SITE_LANG): """ Step 3/4 of loan procedure. Checks that the barcodes exist and that there are no request on these records. Lets the librarian change the due dates and add notes. @type user_id: integer @param user_id: identify the borrower. It is also the primary key of the table crcBORROWER. @type list_of_barcodes: list @param list_of_barcodes: list of strings with the barcodes introduced by the librarian with the barcode reader """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] list_of_books = [] # to avoid duplicates aux = [] for bc in list_of_barcodes: if bc not in aux: aux.append(bc) list_of_barcodes = aux for value in list_of_barcodes: recid = db.get_id_bibrec(value) loan_id = db.is_item_on_loan(value) queue = db.get_queue_request(recid) if recid is None: infos.append(_('%(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s Unknown barcode.') % {'x_barcode': value, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} + ' ' + _('Please, try again.')) body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id, infos=infos, ln=ln) elif loan_id: infos.append('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.' % {'x_barcode': value, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id, infos=infos, ln=ln) elif user_id is None: infos.append(_('You must select one borrower.')) body = bc_templates.tmpl_loan_on_desk_step1(result=None, key='', string='', infos=infos, ln=ln) else: (library_id, location) = db.get_lib_location(value) tup = (recid, value, library_id, location) list_of_books.append(tup) book_details = db.get_item_info(value) item_status = book_details[7] if item_status != CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF: message = _("%(x_strong_tag_open)sWARNING:%(x_strong_tag_close)s Note that item %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s status is %(x_strong_tag_open)s%(x_status)s%(x_strong_tag_close)s") % {'x_barcode': value, 'x_strong_tag_open': '', 'x_strong_tag_close': '', 'x_status': item_status} infos.append(message) if CFG_CERN_SITE: library_type = db.get_library_type(library_id) if library_type != CFG_BIBCIRCULATION_LIBRARY_TYPE_MAIN: library_name = db.get_library_name(library_id) message = _("%(x_strong_tag_open)sWARNING:%(x_strong_tag_close)s Note that item %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s location is %(x_strong_tag_open)s%(x_location)s%(x_strong_tag_close)s") % {'x_barcode': value, 'x_strong_tag_open': '', 'x_strong_tag_close': '', 'x_location': library_name} infos.append(message) if len(queue) != 0 and queue[0][0] != user_id: message = _("Another user is waiting for the book: %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s. \n\n If you want continue with this loan choose %(x_strong_tag_open)s[Continue]%(x_strong_tag_close)s.") % {'x_title': book_title_from_MARC(recid), 'x_strong_tag_open': '', 'x_strong_tag_close': ''} infos.append(message) body = bc_templates.tmpl_loan_on_desk_step3(user_id=user_id, list_of_books=list_of_books, infos=infos, ln=ln) if list_of_barcodes == []: infos.append(_('Empty barcode.') + ' ' + _('Please, try again.')) body = bc_templates.tmpl_loan_on_desk_step2(user_id=user_id, infos=infos, ln=ln) if infos == []: # shortcut to simplify loan process due_dates = [] for bc in list_of_barcodes: due_dates.append(renew_loan_for_X_days(bc)) return loan_on_desk_step4(req, list_of_barcodes, user_id, due_dates, None, ln) else: return page(title=_("Circulation management"), uid=id_user, req=req, body=body, metaheaderadd = "" % CFG_SITE_SECURE_URL, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def loan_on_desk_step4(req, list_of_barcodes, user_id, due_date, note, ln=CFG_SITE_LANG): """ Step 4/4 of loan procedure. Checks that items are not on loan and that the format of the dates is correct and creates the loans @type user_id: integer @param user_id: identify the borrower. It is also the primary key of the table crcBORROWER. @type list_of_barcodes: list @param list_of_barcodes: list of strings with the barcodes introduced by the librarian with the barcode reader @type due_date: list. @param due_date: list of due dates. @type note: string. @param note: note about the new loan. @return: page with the list 'Last Loans' """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] #loaned_on = datetime.date.today() #Check if one of the given items is on loan. on_loan = [] for barcode in list_of_barcodes: is_on_loan = db.is_item_on_loan(barcode) if is_on_loan: on_loan.append(barcode) if len(on_loan) != 0: message = _("The items with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s are already on loan.") % {'x_barcode': on_loan, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} infos.append(message) body = bc_templates.tmpl_loan_on_desk_step1(result=None, key='', string='', infos=infos, ln=ln) return page(title=_("Loan on desk"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) # validate the period of interest given by the admin for date in due_date: if validate_date_format(date) is False: infos = [] message = _("The given due date %(x_strong_tag_open)s%(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': date, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} infos.append(message) list_of_books = [] for bc in list_of_barcodes: recid = db.get_id_bibrec(bc) (library_id, location) = db.get_lib_location(bc) tup = (recid, bc, library_id, location) list_of_books.append(tup) body = bc_templates.tmpl_loan_on_desk_step3(user_id=user_id, list_of_books=list_of_books, infos=infos, ln=ln) return page(title=_("Circulation management"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) #if borrower_id == None: # db.new_borrower(ccid, name, email, phone, address, mailbox, '') # borrower_id = db.get_borrower_id_by_email(email) for i in range(len(list_of_barcodes)): note_format = {} if note: note_format[time.strftime("%Y-%m-%d %H:%M:%S")] = str(note) barcode = list_of_barcodes[i] recid = db.get_recid(barcode) db.new_loan(user_id, recid, barcode, due_date[i], CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, 'normal', note_format) tag_all_requests_as_done(barcode, user_id) db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode) update_requests_statuses(barcode) return redirect_to_url(req, '%s/admin2/bibcirculation/all_loans?ln=%s&msg=ok' % (CFG_SITE_SECURE_URL, ln)) def loan_on_desk_confirm(req, barcode=None, borrower_id=None, ln=CFG_SITE_LANG): """ *** Obsolete and unmantained function *** Confirm the return of an item. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) result = db.loan_on_desk_confirm(barcode, borrower_id) body = bc_templates.tmpl_loan_on_desk_confirm(result=result, barcode=barcode, borrower_id=borrower_id, ln=ln) return page(title=_("Loan on desk confirm"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_new_loan(req, barcode, borrower_id, request_id, new_note, print_data, ln=CFG_SITE_LANG): """ Register a new loan. This function is from the "Associate barcode" pages. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. @type request_id: integer. @param request_id: identify the hold request. It is also the primary key of the table crcLOANREQUEST. @type new_note: string. @param new_note: associate a note to this loan. @type print_data: string. @param print_data: print the information about this loan. @return: new loan """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) has_recid = db.get_id_bibrec(barcode) loan_id = db.is_item_on_loan(barcode) recid = db.get_request_recid(request_id) list_of_barcodes = db.get_barcodes(recid) infos = [] if print_data == 'true': return print_new_loan_information(req, ln) else: if has_recid is None: message = _('%(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s Unknown barcode.') % {'x_barcode': barcode, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} + ' ' + _('Please, try again.') infos.append(message) borrower = db.get_borrower_details(borrower_id) title = _("Associate barcode") body = bc_templates.tmpl_associate_barcode(request_id=request_id, recid=recid, borrower=borrower, infos=infos, ln=ln) elif loan_id: infos.append(_('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.') % {'x_barcode': barcode, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) borrower = db.get_borrower_details(borrower_id) title = _("Associate barcode") body = bc_templates.tmpl_associate_barcode(request_id=request_id, recid=recid, borrower=borrower, infos=infos, ln=ln) elif barcode not in list_of_barcodes: infos.append(_('The given barcode "%(x_barcode)s" does not correspond to requested item.') % {'x_barcode': barcode}) borrower = db.get_borrower_details(borrower_id) title = _("Associate barcode") body = bc_templates.tmpl_associate_barcode(request_id=request_id, recid=recid, borrower=borrower, infos=infos, ln=ln) else: recid = db.get_id_bibrec(barcode) #loaned_on = datetime.date.today() due_date = renew_loan_for_X_days(barcode) if new_note: note_format = '[' + time.ctime() + '] ' + new_note + '\n' else: note_format = '' last_id = db.new_loan(borrower_id, recid, barcode, due_date, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, 'normal', note_format) # register event in webstat try: register_customevent("loanrequest", [request_id, last_id]) except: register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'") #requested_barcode = db.get_requested_barcode(request_id) tag_all_requests_as_done(barcode, borrower_id) db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode) db.update_loan_request_status(request_id, CFG_BIBCIRCULATION_REQUEST_STATUS_DONE) db.update_request_barcode(barcode, request_id) update_requests_statuses(barcode) result = db.get_all_loans(20) infos.append(_('A new loan has been registered with success.')) title = _("Current loans") body = bc_templates.tmpl_all_loans(result=result, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=title, uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_borrower_details(req, borrower_id, update, ln=CFG_SITE_LANG): """ Display the details of a borrower. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if update and CFG_CERN_SITE: update_user_info_from_ldap(borrower_id) borrower = db.get_borrower_details(borrower_id) if borrower == None: info = _('Borrower not found.') + ' ' + _('Please, try again.') return borrower_search(req, info, False, ln) else: requests = db.get_borrower_request_details(borrower_id) loans = db.get_borrower_loan_details(borrower_id) notes = db.get_borrower_notes(borrower_id) ill = db.get_ill_requests_details(borrower_id) req_hist = db.bor_requests_historical_overview(borrower_id) loans_hist = db.bor_loans_historical_overview(borrower_id) ill_hist = db.bor_ill_historical_overview(borrower_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_borrower_details(borrower=borrower, requests=requests, loans=loans, notes=notes, ill=ill, req_hist=req_hist, loans_hist=loans_hist, ill_hist=ill_hist, ln=ln) return page(title=_("Borrower details"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_borrower_loans_details(req, recid, barcode, borrower_id, renewall, force, loan_id, ln=CFG_SITE_LANG): """ Show borrower's loans details. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. @type renewall: string. @param renewall: renew all loans. @type force: string. @param force: force the renew of a loan, when usually this is not possible. @type loan_id: integer. @param loan_id: identify a loan. It is the primery key of the table crcLOAN. @return: borrower loans details. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] force_renew_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/get_borrower_loans_details', {'barcode': barcode, 'borrower_id': borrower_id, 'loan_id': loan_id, 'force': 'true', 'ln': ln}, (_("Yes"))) no_renew_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/get_borrower_loans_details', {'borrower_id': borrower_id, 'ln': ln}, (_("No"))) if barcode and loan_id and recid: queue = db.get_queue_request(recid) new_due_date = renew_loan_for_X_days(barcode) if len(queue) != 0: title = book_title_from_MARC(recid) message = _("Another user is waiting for this book %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s.") % {'x_title': title, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} message += '\n\n' message += _("Do you want renew this loan anyway?") message += '\n\n' message += "[%s] [%s]" % (force_renew_link, no_renew_link) infos.append(message) else: #db.update_due_date(loan_id, new_due_date) db.renew_loan(loan_id, new_due_date) #update_status_if_expired(loan_id) infos.append(_("Loan renewed with success.")) elif loan_id and barcode and force == 'true': new_due_date = renew_loan_for_X_days(barcode) db.renew_loan(loan_id, new_due_date) update_status_if_expired(loan_id) infos.append(_("Loan renewed with success.")) elif borrower_id and renewall=='true': list_of_loans = db.get_recid_borrower_loans(borrower_id) for (loan_id, recid, barcode) in list_of_loans: queue = db.get_queue_request(recid) new_due_date = renew_loan_for_X_days(barcode) force_renewall_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/get_borrower_loans_details', {'barcode': barcode, 'borrower_id': borrower_id, 'loan_id': loan_id, 'force': 'true', 'ln': ln}, (_("Yes"))) if len(queue) != 0: title = book_title_from_MARC(recid) message = _("Another user is waiting for this book %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s.") % {'x_title': title, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} message += '\n\n' message += _("Do you want renew this loan anyway?") message += '\n\n' message += "[%s] [%s]" % (force_renewall_link, no_renew_link) infos.append(message) else: db.renew_loan(loan_id, new_due_date) update_status_if_expired(loan_id) if infos == []: infos.append(_("All loans renewed with success.")) borrower_loans = db.get_borrower_loan_details(borrower_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_borrower_loans_details( borrower_loans=borrower_loans, borrower_id=borrower_id, infos=infos, ln=ln) return page(title=_("Loans details") + \ " - %s" %(db.get_borrower_name(borrower_id)), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_item_loans_details(req, recid, barcode, loan_id, force, ln=CFG_SITE_LANG): """ Show all the details about all current loans related with a record. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type loan_id: integer. @param loan_id: identify a loan. It is the primery key of the table crcLOAN. @type force: string. @param force: force the renew of a loan, when usually this is not possible. @return: item loans details. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if loan_id and barcode and force == 'true': new_due_date = renew_loan_for_X_days(barcode) #db.update_due_date(loan_id, new_due_date) db.renew_loan(loan_id, new_due_date) update_status_if_expired(loan_id) infos.append(_("Loan renewed with success.")) elif barcode: recid = db.get_id_bibrec(barcode) queue = db.get_queue_request(recid) new_due_date = renew_loan_for_X_days(barcode) force_renew_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/get_item_loans_details', {'barcode': barcode, 'loan_id': loan_id, 'force': 'true', 'recid': recid, 'ln': ln}, (_("Yes"))) no_renew_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/get_item_loans_details', {'recid': recid, 'ln': ln}, (_("No"))) if len(queue) != 0: title = book_title_from_MARC(recid) message = _("Another user is waiting for this book %(x_strong_tag_open)s%(x_title)s%(x_strong_tag_close)s.") % {'x_title': title, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} message += '\n\n' message += _("Do you want renew this loan anyway?") message += '\n\n' message += "[%s] [%s]" % (force_renew_link, no_renew_link) infos.append(message) else: db.renew_loan(loan_id, new_due_date) #db.update_due_date(loan_id, new_due_date) update_status_if_expired(loan_id) infos.append(_("Loan renewed with success.")) result = db.get_item_loans(recid) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_get_item_loans_details(result=result, recid=recid, infos=infos, ln=ln) return page(title=_("Loans details") + \ " - %s" % (book_title_from_MARC(int(recid))), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_item_details(req, recid, ln=CFG_SITE_LANG): """ Display the details of an item. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @return: item details. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) id_user = 1 infos = [] if recid == None: infos.append(_("Record id not valid")) copies = db.get_item_copies_details(recid) requests = db.get_item_requests(recid) loans = db.get_item_loans(recid) req_hist_overview = db.get_item_requests_historical_overview(recid) loans_hist_overview = db.get_item_loans_historical_overview(recid) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_get_item_details(recid=recid, copies=copies, requests=requests, loans=loans, req_hist_overview=req_hist_overview, loans_hist_overview=loans_hist_overview, infos=infos, ln=ln) return page(title=_("Item details"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_item_req_historical_overview(req, recid, ln=CFG_SITE_LANG): """ Display the requests historical overview of an item. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @return: Item requests - historical overview. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) req_hist_overview = db.get_item_requests_historical_overview(recid) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_get_item_req_historical_overview( req_hist_overview=req_hist_overview, ln=ln) return page(title=_("Requests") + " - " + _("historical overview"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_item_loans_historical_overview(req, recid, ln=CFG_SITE_LANG): """ Display the loans historical overview of an item. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @return: Item loans - historical overview. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) loans_hist_overview = db.get_item_loans_historical_overview(recid) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_get_item_loans_historical_overview( loans_hist_overview=loans_hist_overview, ln=ln) return page(title=_("Loans") + " - " + _("historical overview"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def bor_loans_historical_overview(req, borrower_id, ln=CFG_SITE_LANG): """ Display the loans historical overview of a borrower. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. @return: borrower loans - historical overview. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) loans_hist_overview = db.bor_loans_historical_overview(borrower_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_bor_loans_historical_overview( loans_hist_overview = loans_hist_overview, ln=ln) return page(title=_("Loans") + " - " + _("historical overview"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def bor_requests_historical_overview(req, borrower_id, ln=CFG_SITE_LANG): """ Display the requests historical overview of a borrower. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. @return: borrower requests - historical overview. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) req_hist_overview = db.bor_requests_historical_overview(borrower_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_bor_requests_historical_overview( req_hist_overview = req_hist_overview, ln=ln) return page(title=_("Requests") + " - " + _("historical overview"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_library_details(req, library_id, ln=CFG_SITE_LANG): """ Display the details of a library. @type library_id: integer. @param library_id: identify the library. It is also the primary key of the table crcLIBRARY. @return: library details. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) library_details = db.get_library_details(library_id) if library_details is None: _ = gettext_set_language(ln) infos = [] infos.append(_('Library ID not found.')) return search_library_step1(req, infos, ln) library_items = db.get_library_items(library_id) body = bc_templates.tmpl_library_details(library_details=library_details, library_items=library_items, ln=ln) return page(title=_("Library details"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def merge_libraries_step1(req, library_id, f=None, p=None, ln=CFG_SITE_LANG): """ Step 1/3 of library merging procedure @param library_id: ID of the library to be deleted @param p: search pattern. @param f: field """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) library_details = db.get_library_details(library_id) library_items = db.get_library_items(library_id) result = None if f is not None: if p in (None, '', '*'): result = db.get_all_libraries() #list of (id, name) elif f == 'name': result = db.search_library_by_name(p) elif f == 'email': result = db.search_library_by_email(p) body = bc_templates.tmpl_merge_libraries_step1( library_details=library_details, library_items=library_items, result=result, p=p, ln=ln) return page(title=_("Merge libraries"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def merge_libraries_step2(req, library_from, library_to, ln=CFG_SITE_LANG): """ Step 2/3 of library merging procedure Confirm the libraries selected @param library_from: ID of the library to be deleted @param library_to: ID of the resulting library """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) library_from_details = db.get_library_details(library_from) library_from_items = db.get_library_items(library_from) library_to_details = db.get_library_details(library_to) library_to_items = db.get_library_items(library_to) body = bc_templates.tmpl_merge_libraries_step2( library_from_details=library_from_details, library_from_items=library_from_items, library_to_details=library_to_details, library_to_items=library_to_items, ln=ln) return page(title=_("Merge libraries"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def merge_libraries_step3(req, library_from, library_to, ln=CFG_SITE_LANG): """ Step 3/3 of library merging procedure Perform the merge and display the details of the resulting library @param library_from: ID of the library to be deleted @param library_to: ID of the resulting library """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) db.merge_libraries(library_from, library_to) return get_library_details(req, library_to, ln) def get_borrower_requests_details(req, borrower_id, request_id, ln=CFG_SITE_LANG): """ Display loans details of a borrower. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. @type request_id: integer. @param request_id: identify the hold request to be cancelled @return: borrower requests details. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if request_id: db.cancel_request(request_id) #update_request_data(request_id) result = db.get_borrower_request_details(borrower_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) name = db.get_borrower_name(borrower_id) title = _("Hold requests details") + " - %s" % (name) body = bc_templates.tmpl_borrower_request_details(result=result, borrower_id=borrower_id, ln=ln) return page(title=title, uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_pending_requests(req, request_id, print_data, ln=CFG_SITE_LANG): """ Get all loans requests that are pending. @type request_id: integer. @param request_id: identify the hold request. It is also the primary key of the table crcLOANREQUEST. @type print_data: string. @param print_data: print requests information. @return: list of pending requests (on shelf with hold). """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if print_data == 'true': return print_pending_hold_requests_information(req, ln) elif request_id: db.update_loan_request_status(request_id, CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED) barcode = db.get_request_barcode(request_id) update_requests_statuses(barcode) result = db.get_loan_request_by_status(CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING) else: result = db.get_loan_request_by_status(CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_get_pending_requests(result=result, ln=ln) return page(title=_("Items on shelf with holds"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_waiting_requests(req, request_id, print_data, ln=CFG_SITE_LANG): """ Get all loans requests that are waiting. @type request_id: integer. @param request_id: identify the hold request. It is also the primary key of the table crcLOANREQUEST. @type print_data: string. @param print_data: print requests information. @return: list of waiting requests (on loan with hold). """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if print_data == 'true': return print_pending_hold_requests_information(req, ln) elif request_id: db.update_loan_request_status(request_id, CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED) result = db.get_loan_request_by_status(CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING) aux = () for request in result: if db.get_nb_copies_on_loan(request[1]): aux += request, result = aux navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_get_waiting_requests(result=result, ln=ln) return page(title=_("Items on loan with holds"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def all_requests(req, request_id, ln=CFG_SITE_LANG): """ Display all requests. @type request_id: integer. @param request_id: identify the hold request. It is also the primary key of the table crcLOANREQUEST. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if request_id: db.update_loan_request_status(request_id, CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED) #update_request_data(request_id) result = db.get_all_requests() else: result = db.get_all_requests() navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_all_requests(result=result, ln=ln) return page(title=_("List of hold requests"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def all_loans(req, msg=None, ln=CFG_SITE_LANG): """ Display all loans. @type loans_per_page: integer. @param loans_per_page: number of loans per page. @type jloan: integer. @param jloan: jump to next loan. @return: list with all loans (current loans). """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if msg == 'ok': infos.append(_('A new loan has been registered with success.')) result = db.get_all_loans(20) navtrail_previous_links = 'Admin Area' \ ' > ' \ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) body = bc_templates.tmpl_all_loans(result=result, infos=infos, ln=ln) return page(title=_("Current loans"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def all_expired_loans(req, ln=CFG_SITE_LANG): """ Display all loans. @type loans_per_page: integer. @param loans_per_page: number of loans per page. @return: list with all expired loans (overdue loans). """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) result = db.get_all_expired_loans() infos = [] navtrail_previous_links = 'Admin Area' \ ' > ' \ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) body = bc_templates.tmpl_all_expired_loans(result=result, infos=infos, ln=ln) return page(title=_('Overdue loans'), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_item_requests_details(req, recid, request_id, ln=CFG_SITE_LANG): """ Display all requests for a specific item. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @type request_id: integer. @param request_id: identify the hold request. It is also the primary key of the table crcLOANREQUEST. @return: Item requests details. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if request_id: db.cancel_request(request_id) #update_request_data(request_id) result = db.get_item_requests(recid) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_get_item_requests_details(recid=recid, result=result, ln=ln) return page(title=_("Hold requests") + \ " - %s" % (book_title_from_MARC(recid)), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def associate_barcode(req, request_id, recid, borrower_id, ln=CFG_SITE_LANG): """ Associate a barcode to an hold request. @type request_id: integer. @param request_id: identify the hold request. It is also the primary key of the table crcLOANREQUEST. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) borrower = db.get_borrower_details(borrower_id) infos = [] navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_associate_barcode(request_id=request_id, recid=recid, borrower=borrower, infos=infos, ln=ln) return page(title=_("Associate barcode"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_borrower_notes(req, borrower_id, delete_key, library_notes, ln=CFG_SITE_LANG): """ Retrieve the notes of a borrower. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if delete_key and borrower_id: borrower_notes = eval(db.get_borrower_notes(borrower_id)) if delete_key in borrower_notes.keys(): del borrower_notes[delete_key] db.update_borrower_notes(borrower_id, borrower_notes) elif library_notes: if db.get_borrower_notes(borrower_id): if looks_like_dictionary(db.get_borrower_notes(borrower_id)): borrower_notes = eval(db.get_borrower_notes(borrower_id)) else: borrower_notes = {} else: borrower_notes = {} note_time = time.strftime("%Y-%m-%d %H:%M:%S") if note_time not in borrower_notes.keys(): borrower_notes[note_time] = str(library_notes) db.update_borrower_notes(borrower_id, borrower_notes) borrower_notes = db.get_borrower_notes(borrower_id) navtrail_previous_links = 'Admin Area' \ ' > ' \ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) body = bc_templates.tmpl_borrower_notes(borrower_notes=borrower_notes, borrower_id=borrower_id, ln=ln) return page(title=_("Borrower notes"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_loans_notes(req, loan_id, delete_key, library_notes, back, ln=CFG_SITE_LANG): """ Get loan's note(s). @type loan_id: integer. @param loan_id: identify a loan. It is the primery key of the table crcLOAN. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if delete_key and loan_id: loans_notes = eval(db.get_loan_notes(loan_id)) if delete_key in loans_notes.keys(): del loans_notes[delete_key] db.update_loan_notes(loan_id, loans_notes) elif library_notes: if db.get_loan_notes(loan_id): if looks_like_dictionary(db.get_loan_notes(loan_id)): loans_notes = eval(db.get_loan_notes(loan_id)) else: loans_notes = {} else: loans_notes = {} note_time = time.strftime("%Y-%m-%d %H:%M:%S") if note_time not in loans_notes.keys(): loans_notes[note_time] = str(library_notes) db.update_loan_notes(loan_id, loans_notes) loans_notes = db.get_loan_notes(loan_id) navtrail_previous_links = 'Admin Area' \ ' > ' \ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) referer = req.headers_in.get('referer') body = bc_templates.tmpl_get_loans_notes(loans_notes=loans_notes, loan_id=loan_id, referer=referer, back=back, ln=ln) return page(title=_("Loan notes"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_borrower_step1(req, ln=CFG_SITE_LANG): """ Add new borrower. Step 1 """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_add_new_borrower_step1(ln=ln) return page(title=_("Add new borrower") + " - I", uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_borrower_step2(req, name, email, phone, address, mailbox, notes, ln=CFG_SITE_LANG): """ Add new borrower. Step 2. @type name: string. @type email: string. @type phone: string. @type address: string. @type mailbox: string. @type notes: string. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if name == '': infos.append(_("Please, insert a name")) if email == '': infos.append(_("Please, insert a valid email address")) else: borrower_id = db.get_borrower_id_by_email(email) if borrower_id is not None: infos.append(_("There is already a borrower using the following email:") + " %s" % (email)) tup_infos = (name, email, phone, address, mailbox, notes) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if len(infos) > 0: body = bc_templates.tmpl_add_new_borrower_step1(tup_infos=tup_infos, infos=infos, ln=ln) title = _("Add new borrower") + " - I" else: if notes != '': borrower_notes = {} note_time = time.strftime("%Y-%m-%d %H:%M:%S") borrower_notes[note_time] = notes else: borrower_notes = '' borrower_id = db.new_borrower(None, name, email, phone, address, mailbox, borrower_notes) return redirect_to_url(req, '%s/admin2/bibcirculation/get_borrower_details?ln=%s&borrower_id=%s' \ % (CFG_SITE_SECURE_URL, ln, borrower_id)) #body = bc_templates.tmpl_add_new_borrower_step2(tup_infos=tup_infos, # infos=infos, ln=ln) #title = _("Add new borrower") + " - II" return page(title=title, uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_borrower_step3(req, tup_infos, ln=CFG_SITE_LANG): """ Add new borrower. Step 3. @type tup_infos: tuple. @param tup_infos: tuple containing borrower information. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if tup_infos[5] != '': borrower_notes = {} note_time = time.strftime("%Y-%m-%d %H:%M:%S") borrower_notes[note_time] = str(tup_infos[5]) else: borrower_notes = '' db.new_borrower(None, tup_infos[0], tup_infos[1], tup_infos[2], tup_infos[3], tup_infos[4], str(borrower_notes)) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_add_new_borrower_step3(ln=ln) return page(title=_("Add new borrower") + " - III", uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) #def update_borrower_info_step1(req, ln=CFG_SITE_LANG): # """ # Update the borrower's information. # """ # # navtrail_previous_links = 'Admin Area' \ # '' % (CFG_SITE_SECURE_URL,) # # id_user = getUid(req) # (auth_code, auth_message) = is_adminuser(req) # if auth_code != 0: # return mustloginpage(req, auth_message) # # _ = gettext_set_language(ln) # # body = bc_templates.tmpl_update_borrower_info_step1(ln=ln) # # return page(title=_("Update borrower information") + " - I", # uid=id_user, # req=req, # body=body, language=ln, # navtrail=navtrail_previous_links, # lastupdated=__lastupdated__) # #def update_borrower_info_step2(req, column, string, ln=CFG_SITE_LANG): # """ # Update the borrower's information. # """ # id_user = getUid(req) # (auth_code, auth_message) = is_adminuser(req) # if auth_code != 0: # return mustloginpage(req, auth_message) # # _ = gettext_set_language(ln) # # if column == 'name': # result = db.search_borrower_by_name(string) # elif column == 'phone': # result = db.search_borrower_by_phone(string) # elif column == 'email': # result = db.search_borrower_by_email(string) # else: # result = db.search_borrower_by_id(string) # # navtrail_previous_links = 'Admin Area' \ # '' % (CFG_SITE_SECURE_URL,) # # body = bc_templates.tmpl_update_borrower_info_step2(result=result, ln=ln) # # return page(title=_("Update borrower information") + " - II", # uid=id_user, # req=req, # body=body, language=ln, # navtrail=navtrail_previous_links, # lastupdated=__lastupdated__) def update_borrower_info_step1(req, borrower_id, ln=CFG_SITE_LANG): """ Update the borrower's information. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) borrower_details = db.get_borrower_details(borrower_id) tup_infos = (borrower_details[0], borrower_details[2], borrower_details[3], borrower_details[4], borrower_details[5], borrower_details[6]) body = bc_templates.tmpl_update_borrower_info_step1(tup_infos=tup_infos, ln=ln) return page(title=_("Update borrower information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_borrower_info_step2(req, borrower_id, name, email, phone, address, mailbox, ln=CFG_SITE_LANG): """ Update the borrower's information. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if name == '': infos.append(_("Please, insert a name")) if email == '': infos.append(_("Please, insert a valid email address")) else: borrower_email_id = db.get_borrower_id_by_email(email) if borrower_email_id is not None and borrower_id != borrower_email_id: infos.append(_("There is already a borrower using the following email:") + " %s" % (email)) tup_infos = (borrower_id, name, email, phone, address, mailbox) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if len(infos) > 0: body = bc_templates.tmpl_update_borrower_info_step1(tup_infos=tup_infos, infos=infos, ln=ln) else: db.update_borrower_info(borrower_id, name, email, phone, address, mailbox) return redirect_to_url(req, '%s/admin2/bibcirculation/get_borrower_details?ln=%s&borrower_id=%s' \ % (CFG_SITE_SECURE_URL, ln, borrower_id)) return page(title=_("Update borrower information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_item_loans_notes(req, loan_id, add_notes, new_note, ln=CFG_SITE_LANG): """ Get loan's notes. @param loan_id: identify a loan. It is the primery key of the table crcLOAN. @param recid: identify the record. It is also the primary key of the table bibrec. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. @param add_notes: display the textarea where will be written a new notes. @param new_notes: note that will be added to the others library's notes. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if new_note: date = '[' + time.ctime() + '] ' new_line = '\n' new_note = date + new_note + new_line db.add_new_loan_note(new_note, loan_id) loans_notes = db.get_loans_notes(loan_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_get_loans_notes(loans_notes=loans_notes, loan_id=loan_id, add_notes=add_notes, ln=ln) return page(title=_("Loan notes"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def new_item(req, isbn, ln=CFG_SITE_LANG): """ Add a new item using the ISBN. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) book_info = [] errors = [] navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if isbn: from xml.dom import minidom - import urllib - filexml = urllib.urlopen('http://ecs.amazonaws.com/onca/xml?' \ + filexml = BIBCIRCULATION_OPENER.open('http://ecs.amazonaws.com/onca/xml?' \ 'Service=AWSECommerceService&AWSAccessKeyId=' \ + CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY + \ '&Operation=ItemSearch&Condition=All&' \ 'ResponseGroup=ItemAttributes&SearchIndex=Books&' \ 'Keywords=' + isbn) xmldoc = minidom.parse(filexml) try: get_error_code = xmldoc.getElementsByTagName('Code') get_error_message = xmldoc.getElementsByTagName('Message') error_code = get_error_code.item(0).firstChild.data error_message = get_error_message.item(0).firstChild.data errors.append(str(error_code)) errors.append(str(error_message)) except AttributeError: errors = "" try: get_author = xmldoc.getElementsByTagName('Author') author = get_author.item(0).firstChild.data book_info.append(str(author)) except AttributeError: author = "" book_info.append(str(author)) try: get_ean = xmldoc.getElementsByTagName('EAN') ean = get_ean.item(0).firstChild.data book_info.append(int(ean)) except AttributeError: ean = "" book_info.append(str(ean)) try: get_isbn = xmldoc.getElementsByTagName('ISBN') short_isbn = get_isbn.item(0).firstChild.data book_info.append(str(short_isbn)) except AttributeError: short_isbn = "" book_info.append(str(short_isbn)) try: get_publisher = xmldoc.getElementsByTagName('Manufacturer') publisher = get_publisher.item(0).firstChild.data book_info.append(str(publisher)) except AttributeError: publisher = "" book_info.append(str(publisher)) try: get_nb_pages = xmldoc.getElementsByTagName('NumberOfPages') nb_pages = get_nb_pages.item(0).firstChild.data book_info.append(int(nb_pages)) except AttributeError: nb_pages = "" book_info.append(str(nb_pages)) try: get_pub_date = xmldoc.getElementsByTagName('PublicationDate') pub_date = get_pub_date.item(0).firstChild.data book_info.append(str(pub_date)) except AttributeError: pub_date = "" book_info.append(str(pub_date)) try: get_title = xmldoc.getElementsByTagName('Title') title = get_title.item(0).firstChild.data book_info.append(str(title)) except AttributeError: title = "" book_info.append(str(title)) try: get_edition = xmldoc.getElementsByTagName('Edition') edition = get_edition.item(0).firstChild.data book_info.append(str(edition)) except AttributeError: edition = "" book_info.append(str(edition)) - cover_xml = urllib.urlopen('http://ecs.amazonaws.com/onca/xml' \ + cover_xml = BIBCIRCULATION_OPENER.open('http://ecs.amazonaws.com/onca/xml' \ '?Service=AWSECommerceService&AWSAccessKeyId=' \ + CFG_BIBCIRCULATION_AMAZON_ACCESS_KEY + \ '&Operation=ItemSearch&Condition=All&' \ 'ResponseGroup=Images&SearchIndex=Books&' \ 'Keywords=' + isbn) xml_img = minidom.parse(cover_xml) try: get_cover_link = xml_img.getElementsByTagName('MediumImage') cover_link = get_cover_link.item(0).firstChild.firstChild.data book_info.append(str(cover_link)) except AttributeError: cover_link = CFG_SITE_SECURE_URL + "/img/book_cover_placeholder.gif" book_info.append(str(cover_link)) if len(errors)!=0: body = bc_templates.tmpl_new_item(errors=errors, ln=ln) else: body = bc_templates.tmpl_new_item(book_info=book_info, ln=ln) else: body = bc_templates.tmpl_new_item(ln=ln) return page(title=_("New Item"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_library_step1(req, ln=CFG_SITE_LANG): """ Add a new Library. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_add_new_library_step1(ln=ln) return page(title=_("Add new library"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_library_step2(req, name, email, phone, address, lib_type, notes, ln=CFG_SITE_LANG): """ Add a new Library. """ tup_infos = (name, email, phone, address, lib_type, notes) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) _ = gettext_set_language(ln) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) body = bc_templates.tmpl_add_new_library_step2(tup_infos=tup_infos, ln=ln) return page(title=_("Add new library"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_library_step3(req, name, email, phone, address, lib_type, notes, ln=CFG_SITE_LANG): """ Add a new Library. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) db.add_new_library(name, email, phone, address, lib_type, notes) body = bc_templates.tmpl_add_new_library_step3(ln=ln) return page(title=_("Add new library"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_library_info_step1(req, ln=CFG_SITE_LANG): """ Update the library's information. """ infos = [] navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_update_library_info_step1(infos=infos, ln=ln) return page(title=_("Update library information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_library_info_step2(req, column, string, ln=CFG_SITE_LANG): """ Update the library's information. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if not string: infos = [] infos.append(_("Empty string.") + ' ' + _('Please, try again.')) body = bc_templates.tmpl_update_library_info_step1(infos=infos, ln=ln) elif string == '*': result = db.get_all_libraries() body = bc_templates.tmpl_update_library_info_step2(result=result, ln=ln) else: if column == 'name': result = db.search_library_by_name(string) else: result = db.search_library_by_email(string) body = bc_templates.tmpl_update_library_info_step2(result=result, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("Update library information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_library_info_step3(req, library_id, ln=CFG_SITE_LANG): """ Update the library's information. library_id - identify the library. It is also the primary key of the table crcLIBRARY. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) library_info = db.get_library_details(library_id) body = bc_templates.tmpl_update_library_info_step3( library_info=library_info, ln=ln) return page(title=_("Update library information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_library_info_step4(req, name, email, phone, address, lib_type, library_id, ln=CFG_SITE_LANG): """ Update the library's information. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) tup_infos = (library_id, name, email, phone, address, lib_type) body = bc_templates.tmpl_update_library_info_step4(tup_infos=tup_infos, ln=ln) return page(title=_("Update library information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_library_info_step5(req, name, email, phone, address, lib_type, library_id, ln=CFG_SITE_LANG): """ Update the library's information. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) #(library_id, name, email, phone, address) = tup_infos db.update_library_info(library_id, name, email, phone, address, lib_type) body = bc_templates.tmpl_update_library_info_step5(ln=ln) return page(title=_("Update library information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def new_book_step1(req, ln): """ Add a new book. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_new_book_step1(ln) return page(title=_("Order New Book"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def new_book_step2(req, ln): """ Add a new book. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_new_book_step2(ln) return page(title=_("Order New Book"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_copy_step1(req, ln=CFG_SITE_LANG): """ Add a new copy. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_add_new_copy_step1(ln) return page(title=_("Add new copy") + " - I", uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_copy_step2(req, p, f, ln=CFG_SITE_LANG): """ Add a new copy. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) result = perform_request_search(cc="Books", sc="1", p=p, f=f) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_add_new_copy_step2(result=result, ln=ln) return page(title=_("Add new copy") + " - II", uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_copy_step3(req, recid, barcode, ln=CFG_SITE_LANG): """ Add a new copy. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] result = db.get_item_copies_details(recid) libraries = db.get_internal_libraries() #(barcode, library, library_name, location, collection, description, # loan_period, status, expected_arrival_date, recid) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if barcode is not None: if not db.barcode_in_use(barcode): barcode = None tmp_barcode = generate_tmp_barcode() body = bc_templates.tmpl_add_new_copy_step3(recid=recid, result=result, libraries=libraries, original_copy_barcode=barcode, tmp_barcode=tmp_barcode, infos=infos, ln=ln) return page(title=_("Add new copy") + " - III", uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_copy_step4(req, barcode, library, location, collection, description, loan_period, status, expected_arrival_date, recid, ln=CFG_SITE_LANG): """ Add a new copy. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) infos = [] result = db.get_item_copies_details(recid) libraries = db.get_internal_libraries() if db.barcode_in_use(barcode): infos.append(_("The given barcode %s is already in use." % barcode)) title = _("Add new copy") + " - III" body = bc_templates.tmpl_add_new_copy_step3(recid=recid, result=result, libraries=libraries, original_copy_barcode=None, tmp_barcode=None, infos=infos, ln=ln) elif not barcode: infos.append(_("The given barcode is empty.")) title = _("Add new copy") + " - III" body = bc_templates.tmpl_add_new_copy_step3(recid=recid, result=result, libraries=libraries, original_copy_barcode=None, tmp_barcode=None, infos=infos, ln=ln) elif barcode[:3] == 'tmp' \ and status in [CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS]: infos.append(_("The status selected does not accept tamporary barcodes.")) title = _("Add new copy") + " - III" tmp_barcode = generate_tmp_barcode() body = bc_templates.tmpl_add_new_copy_step3(recid=recid, result=result, libraries=libraries, original_copy_barcode=None, tmp_barcode=tmp_barcode, infos=infos, ln=ln) else: library_name = db.get_library_name(library) tup_infos = (barcode, library, library_name, location, collection, description, loan_period, status, expected_arrival_date, recid) title = _("Add new copy") + " - IV" body = bc_templates.tmpl_add_new_copy_step4(tup_infos=tup_infos, ln=ln) return page(title=title, uid=id_user, req=req, body=body, metaheaderadd='' % CFG_SITE_SECURE_URL, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_copy_step5(req, barcode, library, location, collection, description, loan_period, status, expected_arrival_date, recid, ln=CFG_SITE_LANG): """ Add a new copy. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if not db.barcode_in_use(barcode): db.add_new_copy(barcode, recid, library, collection, location, description, loan_period, status, expected_arrival_date) update_requests_statuses(barcode) else: infos.append(_("The given barcode %s is already in use.") % barcode) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_add_new_copy_step5(infos=infos, recid=recid, ln=ln) return page(title=_("Add new copy") + " - V", uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_item_info_step1(req, ln=CFG_SITE_LANG): """ Update the item's information. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_update_item_info_step1(ln=ln) return page(title=_("Update item information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_item_info_step2(req, p, f, ln=CFG_SITE_LANG): """ Update the item's information. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) result = perform_request_search(cc="Books", sc="1", p=p, f=f) body = bc_templates.tmpl_update_item_info_step2(result=result, ln=ln) return page(title="Update item information", uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_item_info_step3(req, recid, ln=CFG_SITE_LANG): """ Update the item's information. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) result = db.get_item_copies_details(recid) body = bc_templates.tmpl_update_item_info_step3(recid=recid, result=result, ln=ln) return page(title=_("Update item information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_item_info_step4(req, barcode, ln=CFG_SITE_LANG): """ Update the item's information. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) recid = db.get_id_bibrec(barcode) result = db.get_item_info(barcode) libraries = db.get_internal_libraries() libraries += db.get_hidden_libraries() navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if recid == None: _ = gettext_set_language(ln) infos = [] infos.append(_("Barcode %s not found" % barcode)) return item_search(req, infos, ln) body = bc_templates.tmpl_update_item_info_step4(recid=recid, result=result, libraries=libraries, ln=ln) return page(title=_("Update item information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_item_info_step5(req, barcode, old_barcode, library, location, collection, description, loan_period, status, expected_arrival_date, recid, ln=CFG_SITE_LANG): """ Update the item's information. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) library_name = db.get_library_name(library) tup_infos = (barcode, old_barcode, library, library_name, location, collection, description, loan_period, status, expected_arrival_date, recid) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_update_item_info_step5(tup_infos=tup_infos, ln=ln) return page(title=_("Update item information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_item_info_step6(req, tup_infos, ln=CFG_SITE_LANG): """ Update the item's information. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] # tuple containing information for the update process. (barcode, old_barcode, library_id, location, collection, description, loan_period, status, expected_arrival_date, recid) = tup_infos is_on_loan = db.is_on_loan(old_barcode) #is_requested = db.is_requested(old_barcode) # if item on loan and new status is CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, # item has to be returned. if is_on_loan and status == CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF: db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, old_barcode) db.return_loan(old_barcode) # update item information. db.update_item_info(old_barcode, library_id, collection, location, description, loan_period, status, expected_arrival_date) update_requests_statuses(old_barcode) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if barcode != old_barcode: if db.barcode_in_use(barcode): infos.append(_("Item [%s] updated, but the barcode was not modified because it is already in use.") % (old_barcode)) else: if db.update_barcode(old_barcode, barcode): infos.append(_("Item [%s] updated to [%s] with success.") % (old_barcode, barcode)) else: infos.append(_("Item [%s] updated, but the barcode was not modified because it was not found (!?).") % (old_barcode)) else: infos.append(_("Item [%s] updated with success.") % old_barcode) copies = db.get_item_copies_details(recid) requests = db.get_item_requests(recid) loans = db.get_item_loans(recid) req_hist_overview = db.get_item_requests_historical_overview(recid) loans_hist_overview = db.get_item_loans_historical_overview(recid) body = bc_templates.tmpl_get_item_details(recid=recid, copies=copies, requests=requests, loans=loans, req_hist_overview=req_hist_overview, loans_hist_overview=loans_hist_overview, infos=infos, ln=ln) return page(title=_("Update item information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def search_library_step1(req, infos=[], ln=CFG_SITE_LANG): """ Display the form where we can search a library (by name or email). """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_search_library_step1(infos=infos, ln=ln) return page(title=_("Search library"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def search_library_step2(req, column, string, ln=CFG_SITE_LANG): """ Search a library and return a list with all the possible results, using the parameters received from the previous step. column - identify the column, of the table crcLIBRARY, that will be considered during the search. Can be 'name' or 'email'. str - string used for the search process. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if not string: infos = [] infos.append(_("Emptry string.") + ' ' + _('Please, try again.')) body = bc_templates.tmpl_search_library_step1(infos=infos, ln=ln) elif string == '*': result = db.get_all_libraries() body = bc_templates.tmpl_search_library_step2(result=result, ln=ln) else: if column == 'name': result = db.search_library_by_name(string) else: result = db.search_library_by_email(string) body = bc_templates.tmpl_search_library_step2(result=result, ln=ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) return page(title=_("Search library"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_library_notes(req, library_id, delete_key, library_notes, ln=CFG_SITE_LANG): """ Retrieve notes related with a library. library_id - identify the library. It is also the primary key of the table crcLIBRARY. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if delete_key and library_id: if looks_like_dictionary(db.get_library_notes(library_id)): lib_notes = eval(db.get_library_notes(library_id)) del lib_notes[delete_key] db.update_library_notes(library_id, lib_notes) elif library_notes: if db.get_library_notes(library_id): if looks_like_dictionary(db.get_library_notes(library_id)): lib_notes = eval(db.get_library_notes(library_id)) else: lib_notes = {} else: lib_notes = {} lib_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = str(library_notes) db.update_library_notes(library_id, lib_notes) lib_notes = db.get_library_notes(library_id) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) body = bc_templates.tmpl_library_notes(library_notes=lib_notes, library_id=library_id, ln=ln) return page(title=_("Library notes"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def change_due_date_step1(req, barcode, borrower_id, ln=CFG_SITE_LANG): """ Change the due date of a loan, step1. loan_id: identify a loan. It is the primery key of the table crcLOAN. borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) loan_id = db.get_current_loan_id(barcode) loan_details = db.get_loan_infos(loan_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_change_due_date_step1(loan_details=loan_details, loan_id=loan_id, borrower_id=borrower_id, ln=ln) return page(title=_("Change due date"), uid=id_user, req=req, body=body, language=ln, #metaheaderadd = '' % CFG_SITE_SECURE_URL, metaheaderadd = '' % CFG_SITE_SECURE_URL, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def change_due_date_step2(req, new_due_date, loan_id, borrower_id, ln=CFG_SITE_LANG): """ Change the due date of a loan, step2. due_date: new due date. loan_id: identify a loan. It is the primery key of the table crcLOAN. borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) db.update_due_date(loan_id, new_due_date) update_status_if_expired(loan_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_change_due_date_step2(new_due_date=new_due_date, borrower_id=borrower_id, ln=ln) return page(title=_("Change due date"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def claim_book_return(req, borrower_id, recid, loan_id, template, ln=CFG_SITE_LANG): """ Claim the return of an item. borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. recid: identify the record. It is also the primary key of the table bibrec. template: letter template. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) email_body = generate_email_body(load_template(template), loan_id) email = db.get_borrower_email(borrower_id) subject = book_title_from_MARC(int(recid)) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_borrower_notification(email=email, subject=subject, email_body=email_body, borrower_id=borrower_id, ln=ln) return page(title=_("Claim return"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def create_new_loan_step1(req, borrower_id, ln=CFG_SITE_LANG): """ Create a new loan from the borrower's page, step1. borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] borrower = db.get_borrower_details(borrower_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_create_new_loan_step1(borrower=borrower, infos=infos, ln=ln) return page(title=_("New loan"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def create_new_loan_step2(req, borrower_id, barcode, notes, ln=CFG_SITE_LANG): """ Create a new loan from the borrower's page, step2. borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. barcode: identify the item. It is the primary key of the table crcITEM. notes: notes about the new loan. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) #borrower_info = db.get_borrower_data(borrower_id) has_recid = db.get_id_bibrec(barcode) loan_id = db.is_item_on_loan(barcode) if notes: notes_format = '[' + time.ctime() + '] ' + notes + '\n' else: notes_format = '' infos = [] if has_recid is None: infos.append(_('%(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s Unknown barcode.') % {'x_barcode': barcode, 'x_strong_tag_open': '', 'x_strong_tag_close': ''} + ' ' + _('Please, try again.')) borrower = db.get_borrower_details(borrower_id) title = _("New loan") body = bc_templates.tmpl_create_new_loan_step1(borrower=borrower, infos=infos, ln=ln) elif loan_id: infos.append(_('The item with the barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is on loan.') % {'x_barcode': barcode, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) borrower = db.get_borrower_details(borrower_id) title = _("New loan") body = bc_templates.tmpl_create_new_loan_step1(borrower=borrower, infos=infos, ln=ln) else: #loaned_on = datetime.date.today() due_date = renew_loan_for_X_days(barcode) db.new_loan(borrower_id, has_recid, barcode, due_date, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, 'normal', notes_format) tag_all_requests_as_done(barcode, borrower_id) db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode) update_requests_statuses(barcode) result = db.get_all_loans(20) title = _("Current loans") infos.append(_('A new loan has been registered with success.')) body = bc_templates.tmpl_all_loans(result=result, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=title, uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def create_new_request_step1(req, borrower_id, p="", f="", search=None, ln=CFG_SITE_LANG): """ Create a new request from the borrower's page, step1. borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. p: search pattern. f: field search: search an item. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] borrower = db.get_borrower_details(borrower_id) if search and p == '': infos.append(_('Empty string.') + ' ' + _('Please, try again.')) result = '' elif search and f == 'barcode': p = p.strip('\'" \t') has_recid = db.get_recid(p) if has_recid is None: infos.append(_('The barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s does not exist on BibCirculation database.') % {'x_barcode': p, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) result = '' else: result = has_recid elif search: result = perform_request_search(cc="Books", sc="1", p=p, f=f) else: result = '' navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if type(result) is types.IntType or type(result) is types.LongType: recid = result holdings_information = db.get_holdings_information(recid) user_info = db.get_borrower_details(borrower_id) body = bc_templates.tmpl_create_new_request_step2(user_info=user_info, holdings_information=holdings_information, recid=recid, ln=ln) else: body = bc_templates.tmpl_create_new_request_step1(borrower=borrower, infos=infos, result=result, p=p, f=f, ln=ln) return page(title=_("New request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def create_new_request_step2(req, recid, borrower_id, ln=CFG_SITE_LANG): """ Create a new request from the borrower's page, step2. recid: identify the record. It is also the primary key of the table bibrec. borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) holdings_information = db.get_holdings_information(recid) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) user_info = db.get_borrower_details(borrower_id) body = bc_templates.tmpl_create_new_request_step2(user_info=user_info, holdings_information=holdings_information, recid=recid, ln=ln) return page(title=_("New request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def create_new_request_step3(req, borrower_id, barcode, recid, ln=CFG_SITE_LANG): """ Create a new request from the borrower's page, step3. borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. barcode: identify the item. It is the primary key of the table crcITEM. recid: identify the record. It is also the primary key of the table bibrec. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) item_info = db.get_item_info(barcode) if item_info[6] == 'Reference': body = bc_templates.tmpl_book_not_for_loan(ln=ln) else: body = bc_templates.tmpl_create_new_request_step3( borrower_id=borrower_id, barcode=barcode, recid=recid, ln=ln) return page(title=_("New request"), uid=id_user, req=req, body=body, metaheaderadd = "" % CFG_SITE_SECURE_URL, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def create_new_request_step4(req, period_from, period_to, barcode, borrower_id, recid, ln=CFG_SITE_LANG): """ Create a new request from the borrower's page, step4. period_from: begining of the period of interest. period_to: end of the period of interest. barcode: identify the item. It is the primary key of the table crcITEM. borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. recid: identify the record. It is also the primary key of the table bibrec. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) db.new_hold_request(borrower_id, recid, barcode, period_from, period_to, CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING) update_requests_statuses(barcode) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_create_new_request_step4(ln=ln) return page(title=_("New request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def place_new_request_step1(req, barcode, recid, key, string, ln=CFG_SITE_LANG): """ Place a new request from the item's page, step1. barcode: identify the item. It is the primary key of the table crcITEM. recid: identify the record. It is also the primary key of the table bibrec. key: search field. string: search pattern. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) recid = db.get_id_bibrec(barcode) infos = [] if key and not string: infos.append(_('Empty string.') + ' ' + _('Please, try again.')) body = bc_templates.tmpl_place_new_request_step1(result=None, key=key, string=string, barcode=barcode, recid=recid, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("New request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) result = search_user(key, string) borrowers_list = [] if len(result) == 0 and key: if CFG_CERN_SITE: infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID.")) else: new_borrower_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/add_new_borrower_step1', {'ln': ln}, _("Register new borrower.")) message = _("0 borrowers found.") + ' ' + new_borrower_link infos.append(message) else: for user in result: borrower_data = db.get_borrower_data_by_id(user[0]) borrowers_list.append(borrower_data) if len(result) == 1: return place_new_request_step2(req, barcode, recid, borrowers_list[0], ln) else: body = bc_templates.tmpl_place_new_request_step1(result=borrowers_list, key=key, string=string, barcode=barcode, recid=recid, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("New request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def place_new_request_step2(req, barcode, recid, user_info, ln=CFG_SITE_LANG): """ Place a new request from the item's page, step2. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @type user_info: list. @param user_info: information of the user/borrower who was selected. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] body = bc_templates.tmpl_place_new_request_step2(barcode=barcode, recid=recid, user_info=user_info, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("New request"), uid=id_user, req=req, body=body, metaheaderadd = "" % CFG_SITE_SECURE_URL, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def place_new_request_step3(req, barcode, recid, user_info, period_from, period_to, ln=CFG_SITE_LANG): """ Place a new request from the item's page, step3. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @return: new request. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) (_id, ccid, name, email, phone, address, mailbox) = user_info # validate the period of interest given by the admin if validate_date_format(period_from) is False: infos = [] infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_from, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_place_new_request_step2(barcode=barcode, recid=recid, user_info=user_info, infos=infos, ln=ln) return page(title=_("New request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) elif validate_date_format(period_to) is False: infos = [] infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_to, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_place_new_request_step2(barcode=barcode, recid=recid, user_info=user_info, infos=infos, ln=ln) # Register request borrower_id = db.get_borrower_id_by_email(email) if borrower_id == None: db.new_borrower(ccid, name, email, phone, address, mailbox, '') borrower_id = db.get_borrower_id_by_email(email) req_id = db.new_hold_request(borrower_id, recid, barcode, period_from, period_to, CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING) pending_request = update_requests_statuses(barcode) if req_id == pending_request: (title, year, author, isbn, publisher) = book_information_from_MARC(int(recid)) details = db.get_loan_request_details(req_id) if details: library = details[3] location = details[4] request_date = details[7] else: location = '' library = '' request_date = '' link_to_holdings_details = create_html_link(CFG_SITE_SECURE_URL + '/record/%s/holdings'%str(recid), {'ln': ln}, (CFG_SITE_SECURE_URL + '/record/%s/holdings'%str(recid))) subject = _('New request') message = load_template('notification') message = message % (name, ccid, email, address, mailbox, title, author, publisher, year, isbn, location, library, link_to_holdings_details, request_date) send_email(fromaddr = CFG_BIBCIRCULATION_LIBRARIAN_EMAIL, toaddr = CFG_BIBCIRCULATION_LOANS_EMAIL, subject = subject, content = message, header = '', footer = '', attempt_times=1, attempt_sleeptime=10 ) send_email(fromaddr = CFG_BIBCIRCULATION_LIBRARIAN_EMAIL, toaddr = email, subject = subject, content = message, header = '', footer = '', attempt_times=1, attempt_sleeptime=10 ) body = bc_templates.tmpl_place_new_request_step3(ln=ln) return page(title=_("New request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def place_new_loan_step1(req, barcode, recid, key, string, ln=CFG_SITE_LANG): """ Place a new loan from the item's page, step1. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @type key: string. @param key: search field. @type string: string. @param string: search pattern. @return: list of users/borrowers. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) recid = db.get_id_bibrec(barcode) infos = [] if key and not string: infos.append(_('Empty string.') + ' ' + _('Please, try again.')) body = bc_templates.tmpl_place_new_loan_step1(result=None, key=key, string=string, barcode=barcode, recid=recid, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) return page(title=_("New loan"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) result = search_user(key, string) borrowers_list = [] if len(result) == 0 and key: if CFG_CERN_SITE: infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID.")) else: new_borrower_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/add_new_borrower_step1', {'ln': ln}, _("Register new borrower.")) message = _("0 borrowers found.") + ' ' + new_borrower_link infos.append(message) else: for user in result: borrower_data = db.get_borrower_data_by_id(user[0]) borrowers_list.append(borrower_data) body = bc_templates.tmpl_place_new_loan_step1(result=borrowers_list, key=key, string=string, barcode=barcode, recid=recid, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("New loan"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def place_new_loan_step2(req, barcode, recid, user_info, ln=CFG_SITE_LANG): """ Place a new loan from the item's page, step2. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @type user_info: list. @param user_info: information of the user/borrower who was selected. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_place_new_loan_step2(barcode=barcode, recid=recid, user_info=user_info, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("New loan"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def place_new_loan_step3(req, barcode, recid, ccid, name, email, phone, address, mailbox, due_date, notes, ln=CFG_SITE_LANG): """ Place a new loan from the item's page, step3. @type barcode: string. @param barcode: identify the item. It is the primary key of the table crcITEM. @type recid: integer. @param recid: identify the record. It is also the primary key of the table bibrec. @type name: string. @type email: string. @type phone: string. @type address: string. @type mailbos: string. @type due_date: string. @type notes: string. @return: new loan. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if notes: notes_format = '[' + time.ctime() + '] ' + notes + '\n' else: notes_format = '' #loaned_on = datetime.date.today() borrower_id = db.get_borrower_id_by_email(email) borrower_info = db.get_borrower_data(borrower_id) if db.is_on_loan(barcode): infos.append(_("Item with barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s is already on loan.") % {'x_barcode': barcode, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) copies = db.get_item_copies_details(recid) requests = db.get_item_requests(recid) loans = db.get_item_loans(recid) req_hist_overview = db.get_item_requests_historical_overview(recid) loans_hist_overview = db.get_item_loans_historical_overview(recid) title = _("Item details") body = bc_templates.tmpl_get_item_details( recid=recid, copies=copies, requests=requests, loans=loans, req_hist_overview=req_hist_overview, loans_hist_overview=loans_hist_overview, infos=infos, ln=ln) elif borrower_id != 0: db.new_loan(borrower_id, recid, barcode, due_date, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, 'normal', notes_format) tag_all_requests_as_done(barcode, borrower_id) db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode) update_requests_statuses(barcode) title = _("New loan") body = bc_templates.tmpl_register_new_loan(borrower_info=borrower_info, infos=infos, recid=recid, ln=ln) else: db.new_borrower(ccid, name, email, phone, address, mailbox, '') borrower_id = db.get_borrower_id_by_email(email) db.new_loan(borrower_id, recid, barcode, due_date, CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, 'normal', notes_format) tag_all_requests_as_done(barcode, borrower_id) db.update_item_status(CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, barcode) update_requests_statuses(barcode) title = _("New loan") body = bc_templates.tmpl_register_new_loan(borrower_info=borrower_info, infos=infos, recid=recid, ln=ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) return page(title=title, uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def order_new_copy_step1(req, recid, ln): """ Order a new copy. Step 1. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) list_of_vendors = db.get_list_of_vendors() libraries = db.get_internal_libraries() body = bc_templates.tmpl_order_new_copy_step1(recid=recid, list_of_vendors=list_of_vendors, libraries=libraries, ln=ln) return page(title=_("Order new copy"), uid=id_user, req=req, metaheaderadd = """""" % CFG_SITE_SECURE_URL, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def order_new_copy_step2 (req, recid, barcode, vendor_id, cost, currency, status, order_date, expected_date, library_id, notes, ln): """ Order a new copy. Step 2. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) order_info = (recid, barcode, vendor_id, cost, currency, status, order_date, expected_date, library_id, notes) body = bc_templates.tmpl_order_new_copy_step2(order_info=order_info, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("Order new copy"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def order_new_copy_step3(req, order_info, ln): """ Order a new copy. Step 3. """ #id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) (recid, _barcode, vendor_id, cost, currency, status, order_date, expected_date, _library_id, notes) = order_info cost_format = cost + ' ' + currency purchase_notes = {time.strftime("%Y-%m-%d %H:%M:%S"): notes} db.order_new_copy(recid, vendor_id, order_date, cost_format, status, str(purchase_notes), expected_date) return get_item_details(req, recid, ln) def list_ordered_books(req, ln): """ Return the list with all ordered books. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) ordered_books = db.get_ordered_books() body = bc_templates.tmpl_ordered_books(ordered_books=ordered_books, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("List of ordered books"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_purchase_notes(req, purchase_id, delete_key, library_notes, ln=CFG_SITE_LANG): """ Retrieve notes related with a library. purchase_id - identify the purchase. It is also the primary key of the table crcPURCHASE. @param add_notes: display the textarea where will be written a new notes. @param new_notes: note that will be added to the others library's notes. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if delete_key and purchase_id: if looks_like_dictionary(db.get_purchase_notes(purchase_id)): purchase_notes = eval(db.get_purchase_notes(purchase_id)) del purchase_notes[delete_key] db.update_purchase_notes(purchase_id, purchase_notes) elif library_notes: if db.get_purchase_notes(purchase_id): if looks_like_dictionary(db.get_purchase_notes(purchase_id)): purchase_notes = eval(db.get_purchase_notes(purchase_id)) else: purchase_notes = {} else: purchase_notes = {} purchase_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = str(library_notes) db.update_purchase_notes(purchase_id, purchase_notes) purchase_notes = db.get_purchase_notes(purchase_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_purchase_notes(purchase_notes=purchase_notes, purchase_id=purchase_id, ln=ln) return page(title=_("Purchase notes"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_step0(req, recid, key, string, ln=CFG_SITE_LANG): """ Place a new request from the item's page, step1. barcode: identify the item. It is the primary key of the table crcITEM. recid: identify the record. It is also the primary key of the table bibrec. key: search field. string: search pattern. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if string == '': message = _('Empty string.') + ' ' + _('Please, try again.') return borrower_search(req, message, False, ln) else: result = search_user(key, string) infos = [] if not key or (key and not string): if key and not string: infos.append(_('Empty string.') + ' ' + _('Please, try again.')) body = bc_templates.tmpl_register_ill_request_step0(result=None, infos=infos, key=key, string=string, recid=recid, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) if CFG_CERN_SITE == 1: if key == 'name': result = db.search_borrower_by_name(string) elif key == 'email': result = db.search_borrower_by_email(string) else: result = db.search_borrower_by_ccid(int(string)) if result == (): ldap_info = get_user_info_from_ldap(ccid=string) if len(ldap_info) == 0: result = () else: if ldap_info == 'busy': message = bc_templates.tmpl_message_sever_busy(ln) return borrower_search(req, message, False, ln) else: try: name = ldap_info['cn'][0] except KeyError: name = "" try: email = ldap_info['mail'][0] except KeyError: email = "" try: phone = ldap_info['telephoneNumber'][0] except KeyError: phone = "" try: address = ldap_info['physicalDeliveryOfficeName'][0] except KeyError: address = "" try: mailbox = ldap_info['postOfficeBox'][0] except KeyError: mailbox = "" try: ccid = ldap_info['employeeID'][0] except KeyError: ccid = "" db.new_borrower(ccid, name, email, phone, address, mailbox, '') result = db.search_borrower_by_ccid(int(ccid)) else: if key == 'name': result = db.search_borrower_by_name(string) elif key == 'email': result = db.search_borrower_by_email(string) else: result = db.search_borrower_by_id(string) list_infos = [] for (borrower_id, ccid, name, email, phone, address, mailbox) in result: tup = (borrower_id, ccid, name, email, phone, address, mailbox) list_infos.append(tup) body = bc_templates.tmpl_register_ill_request_step0(result=list_infos, infos=infos, key=key, string=string, recid=recid, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_step1(req, recid, user_info, ln=CFG_SITE_LANG): """ Register a new ILL request. Step 1. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_register_ill_request_step1(recid=recid, user_info=user_info, ln=ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_step2(req, recid, user_info, period_of_interest_from, period_of_interest_to, notes, only_edition, ln=CFG_SITE_LANG): navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) request_info = (recid, period_of_interest_from, period_of_interest_to, notes, only_edition) body = bc_templates.tmpl_register_ill_request_step2(user_info=user_info, request_info=request_info, ln=ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_step3(req, borrower_id, request_info, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) (recid, period_of_interest_from, period_of_interest_to, notes, only_edition) = request_info book_info = {'recid': recid} if notes: library_notes = {} library_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = str(notes) else: library_notes = {} db.ill_register_request_on_desk(borrower_id, book_info, period_of_interest_from, period_of_interest_to, CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING, str(library_notes), only_edition or 'No', 'book') navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_register_ill_request_step3(ln=ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def list_ill_request(req, status, ln=CFG_SITE_LANG): navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) ill_req = db.get_ill_requests(status) body = bc_templates.tmpl_list_ill_request(ill_req=ill_req, ln=ln) return page(title=_("List of ILL requests"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def list_acquisition(req, status, ln=CFG_SITE_LANG): navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) acquisitions = db.get_acquisitions(status) body = bc_templates.tmpl_list_acquisition(ill_req=acquisitions, ln=ln) return page(title=_("List of acquisitions"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def ill_request_details_step1(req, delete_key, ill_request_id, new_status, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if delete_key and ill_request_id: if looks_like_dictionary(db.get_ill_request_notes(ill_request_id)): library_notes = eval(db.get_ill_request_notes(ill_request_id)) del library_notes[delete_key] db.update_ill_request_notes(ill_request_id, library_notes) if new_status: db.update_ill_request_status(ill_request_id, new_status) ill_request_borrower_details = \ db.get_ill_request_borrower_details(ill_request_id) if ill_request_borrower_details is None \ or len(ill_request_borrower_details) == 0: infos.append(_("Borrower request details not found.")) ill_request_details = db.get_ill_request_details(ill_request_id) if ill_request_details is None or len(ill_request_details) == 0: infos.append(_("Request not found.")) libraries = db.get_external_libraries() navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if infos == []: body = bc_templates.tmpl_ill_request_details_step1( ill_request_id=ill_request_id, ill_request_details=ill_request_details, libraries=libraries, ill_request_borrower_details=ill_request_borrower_details, ln=ln) title = _("ILL request details") else: body = bc_templates.tmpl_display_infos(infos, ln) return page(title=title, uid=id_user, req=req, metaheaderadd='' % CFG_SITE_SECURE_URL, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def acq_details_step1(req, delete_key, ill_request_id, new_status, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if delete_key and ill_request_id: library_notes = eval(db.get_ill_request_notes(ill_request_id)) del library_notes[delete_key] db.update_ill_request_notes(ill_request_id, library_notes) if new_status: db.update_ill_request_status(ill_request_id, new_status) ill_request_borrower_details = \ db.get_acq_request_borrower_details(ill_request_id) if ill_request_borrower_details is None \ or len(ill_request_borrower_details) == 0: infos.append(_("Borrower request details not found.")) ill_request_details = db.get_ill_request_details(ill_request_id) if ill_request_details is None or len(ill_request_details) == 0: infos.append(_("Request not found.")) vendors = db.get_all_vendors() navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if infos == []: body = bc_templates.tmpl_acq_details_step1( ill_request_id=ill_request_id, ill_request_details=ill_request_details, libraries=vendors, ill_request_borrower_details=ill_request_borrower_details, ln=ln) title = _("Acquisition details") else: body = bc_templates.tmpl_display_infos(infos, ln) return page(title=title, uid=id_user, req=req, metaheaderadd = "" % CFG_SITE_SECURE_URL, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def ill_request_details_step2(req, delete_key, ill_request_id, new_status, library_id, request_date, expected_date, arrival_date, due_date, return_date, cost, _currency, barcode, library_notes, book_info, article_info, ln=CFG_SITE_LANG): #id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if delete_key and ill_request_id: if looks_like_dictionary(db.get_ill_request_notes(ill_request_id)): library_previous_notes = eval(db.get_ill_request_notes(ill_request_id)) del library_previous_notes[delete_key] db.update_ill_request_notes(ill_request_id, library_previous_notes) #navtrail_previous_links = 'Admin Area' \ # '' % (CFG_SITE_SECURE_URL,) if db.get_ill_request_notes(ill_request_id): if looks_like_dictionary(db.get_ill_request_notes(ill_request_id)): library_previous_notes = eval(db.get_ill_request_notes(ill_request_id)) else: library_previous_notes = {} else: library_previous_notes = {} if library_notes: library_previous_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = \ str(library_notes) if new_status == CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED: borrower_id = db.get_ill_borrower(ill_request_id) barcode = db.get_ill_barcode(ill_request_id) db.update_ill_loan_status(borrower_id, barcode, return_date, 'ill') db.update_ill_request(ill_request_id, library_id, request_date, expected_date, arrival_date, due_date, return_date, new_status, cost, barcode, str(library_previous_notes)) request_type = db.get_ill_request_type(ill_request_id) if request_type == 'book': item_info = book_info else: item_info = article_info db.update_ill_request_item_info(ill_request_id, item_info) return list_ill_request(req, new_status, ln) def acq_details_step2(req, delete_key, ill_request_id, new_status, library_id, request_date, expected_date, arrival_date, due_date, return_date, cost, budget_code, library_notes, item_info, ln=CFG_SITE_LANG): #id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if delete_key and ill_request_id: library_previous_notes = eval(db.get_ill_request_notes(ill_request_id)) del library_previous_notes[delete_key] db.update_ill_request_notes(ill_request_id, library_previous_notes) if db.get_ill_request_notes(ill_request_id): library_previous_notes = eval(db.get_ill_request_notes(ill_request_id)) else: library_previous_notes = {} if library_notes: library_previous_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = \ str(library_notes) #if new_status == CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED: # borrower_id = db.get_ill_borrower(ill_request_id) db.update_acq_request(ill_request_id, library_id, request_date, expected_date, arrival_date, due_date, return_date, new_status, cost, budget_code, str(library_previous_notes)) #request_type = db.get_ill_request_type(ill_request_id) db.update_ill_request_item_info(ill_request_id, item_info) return redirect_to_url(req, '%s/admin2/bibcirculation/list_acquisition?ln=%s&status=%s' % \ (CFG_SITE_SECURE_URL, ln, new_status)) def ordered_books_details_step1(req, purchase_id, delete_key, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if delete_key and purchase_id: purchase_notes = eval(db.get_purchase_notes(purchase_id)) del purchase_notes[delete_key] db.update_purchase_notes(purchase_id, purchase_notes) list_of_vendors = db.get_list_of_vendors() order_details = db.get_order_details(purchase_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_ordered_book_details_step1( order_details=order_details, list_of_vendors=list_of_vendors, ln=ln) return page(title=_("Ordered book details"), uid=id_user, req=req, metaheaderadd='' % CFG_SITE_SECURE_URL, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def ordered_books_details_step2(req, purchase_id, recid, vendor_id, cost, currency, status, order_date, expected_date, purchase_notes, library_notes, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) order_details = (purchase_id, recid, vendor_id, cost, currency, status, order_date, expected_date, purchase_notes, library_notes) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_ordered_book_details_step2( order_details=order_details, ln=ln) return page(title=_("Ordered book details"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def ordered_books_details_step3(req, purchase_id, recid, vendor_id, cost, currency, status, order_date, expected_date, purchase_notes, library_notes, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if looks_like_dictionary(purchase_notes): purchase_notes = eval(purchase_notes) else: purchase_notes = {} library_notes = library_notes.strip(' \n\t') if (len(library_notes)) is not 0: purchase_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = str(library_notes) cost_format = cost + ' ' + currency db.update_purchase(purchase_id, recid, vendor_id, cost_format, status, order_date, expected_date, str(purchase_notes)) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) #body = bc_templates.tmpl_ordered_book_details_step3(ln=ln) body = list_ordered_books(req, ln) return page(title=_("Ordered book details"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_vendor_step1(req, ln=CFG_SITE_LANG): """ Add a new Vendor. """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_add_new_vendor_step1(ln=ln) return page(title=_("Add new vendor"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_vendor_step2(req, name, email, phone, address, notes, ln=CFG_SITE_LANG): """ Add a new Vendor. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) tup_infos = (name, email, phone, address, notes) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_add_new_vendor_step2(tup_infos=tup_infos, ln=ln) return page(title=_("Add new vendor"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def add_new_vendor_step3(req, name, email, phone, address, notes, ln=CFG_SITE_LANG): """ Add a new Vendor. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) db.add_new_vendor(name, email, phone, address, notes) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_add_new_vendor_step3(ln=ln) return page(title=_("Add new vendor"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_vendor_info_step1(req, ln=CFG_SITE_LANG): """ Update the vendor's information. """ infos = [] navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) _ = gettext_set_language(ln) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) body = bc_templates.tmpl_update_vendor_info_step1(infos=infos, ln=ln) return page(title=_("Update vendor information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_vendor_info_step2(req, column, string, ln=CFG_SITE_LANG): """ Update the vendor's information. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if not string: infos = [] infos.append(_('Empty string.') + ' ' + _('Please, try again.')) body = bc_templates.tmpl_update_vendor_info_step1(infos=infos, ln=ln) elif string == '*': result = db.get_all_vendors() body = bc_templates.tmpl_update_vendor_info_step2(result=result, ln=ln) else: if column == 'name': result = db.search_vendor_by_name(string) else: result = db.search_vendor_by_email(string) body = bc_templates.tmpl_update_vendor_info_step2(result=result, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_update_vendor_info_step2(result=result, ln=ln) return page(title=_("Update vendor information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_vendor_info_step3(req, vendor_id, ln=CFG_SITE_LANG): """ Update the library's information. vendor_id - identify the vendor. It is also the primary key of the table crcVENDOR. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) vendor_info = db.get_vendor_details(vendor_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_update_vendor_info_step3(vendor_info=vendor_info, ln=ln) return page(title=_("Update vendor information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_vendor_info_step4(req, name, email, phone, address, vendor_id, ln=CFG_SITE_LANG): """ Update the vendor's information. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) tup_infos = (vendor_id, name, email, phone, address) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_update_vendor_info_step4(tup_infos=tup_infos, ln=ln) return page(title=_("Update vendor information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def update_vendor_info_step5(req, name, email, phone, address, vendor_id, ln=CFG_SITE_LANG): """ Update the library's information. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) db.update_vendor_info(vendor_id, name, email, phone, address) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_update_vendor_info_step5(ln=ln) return page(title=_("Update vendor information"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def search_vendor_step1(req, ln=CFG_SITE_LANG): """ Display the form where we can search a vendor (by name or email). """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_search_vendor_step1(infos=infos, ln=ln) return page(title=_("Search vendor"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def search_vendor_step2(req, column, string, ln=CFG_SITE_LANG): """ Search a vendor and return a list with all the possible results, using the parameters received from the previous step. column - identify the column, of the table crcVENDOR, that will be considered during the search. Can be 'name' or 'email'. str - string used for the search process. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if not string: infos = [] infos.append(_('Empty string.') + ' ' + _('Please, try again.')) body = bc_templates.tmpl_search_vendor_step1(infos=infos, ln=ln) elif string == '*': result = db.get_all_vendors() body = bc_templates.tmpl_search_vendor_step2(result=result, ln=ln) else: if column == 'name': result = db.search_vendor_by_name(string) else: result = db.search_vendor_by_email(string) body = bc_templates.tmpl_search_vendor_step2(result=result, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=_("Search vendor"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_vendor_details(req, vendor_id, ln=CFG_SITE_LANG): """ Display the details of a vendor. @type vendor_id: integer. @param vendor_id: identify the vendor. It is also the primary key of the table crcVENDOR. @return: vendor details. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) vendor_details = db.get_vendor_details(vendor_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_vendor_details(vendor_details=vendor_details, ln=ln) return page(title=_("Vendor details"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_vendor_notes(req, vendor_id, add_notes, new_note, ln=CFG_SITE_LANG): """ Retrieve notes related with a vendor. vendor_id - identify the vendor. It is also the primary key of the table crcVENDOR. @param add_notes: display the textarea where will be written a new notes. @param new_notes: note that will be added to the others vendor's notes. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if new_note: date = '[' + time.ctime() + '] ' new_line = '\n' new_note = date + new_note + new_line db.add_new_vendor_note(new_note, vendor_id) vendor_notes = db.get_vendor_notes(vendor_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_vendor_notes(vendor_notes=vendor_notes, vendor_id=vendor_id, add_notes=add_notes, ln=ln) return page(title=_("Vendor notes"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_with_no_recid_step1(req, borrower_id, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_register_ill_request_with_no_recid_step1( infos=infos, borrower_id=borrower_id, admin=True, ln=ln) return page(title=_("Register ILL request"), uid=id_user, req=req, metaheaderadd = "" % CFG_SITE_SECURE_URL, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_with_no_recid_step2(req, title, authors, place, publisher, year, edition, isbn, budget_code, period_of_interest_from, period_of_interest_to, additional_comments, only_edition, key, string, borrower_id, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] book_info = (title, authors, place, publisher, year, edition, isbn) request_details = (budget_code, period_of_interest_from, period_of_interest_to, additional_comments, only_edition) if borrower_id in (None, '', 'None'): body = None if not key: borrowers_list = None elif not string: infos.append(_('Empty string.') + ' ' + _('Please, try again.')) borrowers_list = None else: if validate_date_format(period_of_interest_from) is False: infos = [] infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_from, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_register_ill_request_with_no_recid_step1( infos=infos, borrower_id=None, admin=True, ln=ln) elif validate_date_format(period_of_interest_to) is False: infos = [] infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_to, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_register_ill_request_with_no_recid_step1( infos=infos, ln=ln) else: result = search_user(key, string) borrowers_list = [] if len(result) == 0: infos.append(_("0 borrowers found.")) else: for user in result: borrower_data = db.get_borrower_data_by_id(user[0]) borrowers_list.append(borrower_data) if body == None: body = bc_templates.tmpl_register_ill_request_with_no_recid_step2( book_info=book_info, request_details=request_details, result=borrowers_list, key=key, string=string, infos=infos, ln=ln) else: user_info = db.get_borrower_data_by_id(borrower_id) return register_ill_request_with_no_recid_step3(req, title, authors, place, publisher,year, edition, isbn, user_info, budget_code, period_of_interest_from, period_of_interest_to, additional_comments, only_edition, ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_with_no_recid_step3(req, title, authors, place, publisher, year, edition, isbn, user_info, budget_code, period_of_interest_from, period_of_interest_to, additional_comments, only_edition, ln=CFG_SITE_LANG): navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) request_details = (budget_code, period_of_interest_from, period_of_interest_to, additional_comments, only_edition) book_info = (title, authors, place, publisher, year, edition, isbn) if user_info is None: return register_ill_request_with_no_recid_step2(req, title, authors, place, publisher, year, edition, isbn, budget_code, period_of_interest_from, period_of_interest_to, additional_comments, only_edition, 'name', None, None, ln) else: body = bc_templates.tmpl_register_ill_request_with_no_recid_step3( book_info=book_info, user_info=user_info, request_details=request_details, admin=True, ln=ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_with_no_recid_step4(req, book_info, borrower_id, request_details, ln): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) _ = gettext_set_language(ln) (title, authors, place, publisher, year, edition, isbn) = book_info #create_ill_record(book_info)) (budget_code, period_of_interest_from, period_of_interest_to, library_notes, only_edition) = request_details ill_request_notes = {} if library_notes: ill_request_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = \ str(library_notes) ### budget_code ### if db.get_borrower_data_by_id(borrower_id) == None: _ = gettext_set_language(ln) infos = [] infos.append(_("Request not registered: wrong borrower id")) body = bc_templates.tmpl_register_ill_request_with_no_recid_step2( book_info=book_info, request_details=request_details, result=[], key='name', string=None, infos=infos, ln=ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) else: book_info = {'title': title, 'authors': authors, 'place': place, 'publisher': publisher,'year' : year, 'edition': edition, 'isbn' : isbn} db.ill_register_request_on_desk(borrower_id, book_info, period_of_interest_from, period_of_interest_to, CFG_BIBCIRCULATION_ILL_STATUS_NEW, str(ill_request_notes), only_edition, 'book', budget_code) return list_ill_request(req, CFG_BIBCIRCULATION_ILL_STATUS_NEW, ln) def get_borrower_ill_details(req, borrower_id, ln=CFG_SITE_LANG): """ Display ILL details of a borrower. @type borrower_id: integer. @param borrower_id: identify the borrower. It is also the primary key of the table crcBORROWER. @type ill_id: integer. @param ill_id: identify the ILL request. It is also the primary key of the table crcILLREQUEST. @return: borrower ILL details. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) result = db.get_ill_requests_details(borrower_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) name = db.get_borrower_name(borrower_id) title = _("ILL details") + "- %s" % (name) body = bc_templates.tmpl_borrower_ill_details(result=result, borrower_id=borrower_id, ln=ln) return page(title=title, uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def bor_ill_historical_overview(req, borrower_id, ln): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) result = db.bor_ill_historical_overview(borrower_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) name = db.get_borrower_name(borrower_id) title = _("ILL historical overview") + " - %s" % (name) body = bc_templates.tmpl_borrower_ill_details(result=result, borrower_id=borrower_id, ln=ln) return page(title=title, uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_ill_library_notes(req, ill_id, delete_key, library_notes, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if delete_key and ill_id: if looks_like_dictionary(db.get_ill_notes(ill_id)): ill_notes = eval(db.get_ill_notes(ill_id)) del ill_notes[delete_key] db.update_ill_notes(ill_id, ill_notes) elif library_notes: if db.get_ill_notes(ill_id): if looks_like_dictionary(db.get_ill_notes(ill_id)): ill_notes = eval(db.get_ill_notes(ill_id)) else: ill_notes = {} else: ill_notes = {} ill_notes[time.strftime("%Y-%m-%d %H:%M:%S")] = str(library_notes) db.update_ill_notes(ill_id, ill_notes) ill_notes = db.get_ill_notes(ill_id) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) body = bc_templates.tmpl_ill_notes(ill_notes=ill_notes, ill_id=ill_id, ln=ln) return page(title=_("ILL notes"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def get_expired_loans_with_requests(req, request_id, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if request_id: db.update_loan_request_status(request_id, CFG_BIBCIRCULATION_REQUEST_STATUS_CANCELLED) #update_request_data(request_id) result = db.get_expired_loans_with_requests() else: result = db.get_expired_loans_with_requests() navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) body = bc_templates.tmpl_get_expired_loans_with_requests(result=result, ln=ln) return page(title=_("Overdue loans with holds"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_book_request(req, borrower_id, ln=CFG_SITE_LANG): """ Display a form where is possible to searh for an item. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) _ = gettext_set_language(ln) infos = [] body = bc_templates.tmpl_register_ill_book_request(infos=infos, borrower_id=borrower_id, ln=ln) return page(title=_("Register ILL Book request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_book_request_result(req, borrower_id, p, f, ln=CFG_SITE_LANG): """ Search an item and return a list with all the possible results. To retrieve the information desired, we use the method 'perform_request_search' (from search_engine.py). In the case of BibCirculation, we are just looking for books (items) inside the collection 'Books'. @type p: string @param p: search pattern @type f: string @param f: search field @return: list of recids """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if p == '': infos.append(_('Empty string.') + ' ' + _('Please, try again.')) body = bc_templates.tmpl_register_ill_book_request(infos=infos, borrower_id=borrower_id, ln=ln) else: if f == 'barcode': p = p.strip('\'" \t') recid = db.get_recid(p) if recid is None: infos.append(_('The barcode %(x_strong_tag_open)s%(x_barcode)s%(x_strong_tag_close)s does not exist on BibCirculation database.') % {'x_barcode': p, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_register_ill_book_request(infos=infos, borrower_id=borrower_id, ln=ln) else: body = bc_templates.tmpl_register_ill_book_request_result( result=[recid], borrower_id=borrower_id, ln=ln) else: result = perform_request_search(cc="Books", sc="1", p=p, f=f) if len(result) == 0: return register_ill_request_with_no_recid_step1(req, borrower_id, ln) else: body = bc_templates.tmpl_register_ill_book_request_result( result=result, borrower_id=borrower_id, ln=ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) return page(title=_("Register ILL Book request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_book_request_from_borrower_page(req, borrower_id, ln=CFG_SITE_LANG): """ Display a form where is possible to searh for an item. """ id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) _ = gettext_set_language(ln) infos = [] body = bc_templates.tmpl_register_ill_book_request_from_borrower_page( infos=infos, borrower_id=borrower_id, ln=ln) return page(title=_("Register ILL Book request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_from_borrower_page_step1(req, borrower_id, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) infos = [] navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) _ = gettext_set_language(ln) body = bc_templates.tmpl_register_ill_request_from_borrower_page_step1( infos=infos, borrower_id=borrower_id, ln=ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_request_from_borrower_page_step2(req, borrower_id, title, authors, place, publisher, year, edition, isbn, period_of_interest_from, period_of_interest_to, additional_comments, only_edition, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if validate_date_format(period_of_interest_from) is False: infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_from, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_register_ill_request_from_borrower_page_step1( infos=infos, borrower_id=borrower_id, ln=ln) elif validate_date_format(period_of_interest_to) is False: infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_to, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_register_ill_request_from_borrower_page_step1( infos=infos, borrower_id=borrower_id, ln=ln) else: book_info = (title, authors, place, publisher, year, edition, isbn) user_info = db.get_borrower_details(borrower_id) request_details = (period_of_interest_from, period_of_interest_to, additional_comments, only_edition) body = bc_templates.tmpl_register_ill_request_with_no_recid_step3( book_info=book_info, user_info=user_info, request_details=request_details, admin=True, ln=ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_purchase_request_step1(req, recid, request_type, title, authors, place, publisher, year, edition, this_edition_only, isbn, standard_number, budget_code, cash, period_of_interest_from, period_of_interest_to, additional_comments, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) if recid != '': (title, year, authors, isbn, publisher) = book_information_from_MARC(int(recid)) body = bc_templates.tmpl_register_purchase_request_step1(infos=infos, fields=(request_type, title, authors, place, publisher, year, edition, this_edition_only, isbn, standard_number, budget_code, cash, period_of_interest_from, period_of_interest_to, additional_comments), ln=ln) return page(title=_("Register purchase request"), uid=id_user, req=req, body=body, language=ln, metaheaderadd='' % CFG_SITE_SECURE_URL, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_purchase_request_step2(req, request_type, title, authors, place, publisher, year, edition, this_edition_only, isbn, standard_number, budget_code, cash, period_of_interest_from, period_of_interest_to, additional_comments, p, f, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) infos = [] if cash and budget_code == '': budget_code = 'cash' fields = (request_type, title, authors, place, publisher, year, edition, this_edition_only, isbn, standard_number, budget_code, cash, period_of_interest_from, period_of_interest_to, additional_comments) if budget_code == '' and not cash: infos.append(_("Payment method information is mandatory. Please, type your budget code or tick the 'cash' checkbox.")) body = bc_templates.tmpl_register_purchase_request_step1(infos=infos, fields=fields, ln=ln) else: ######################## ######################## if p and not f: infos.append(_('Empty string.') + ' ' + _('Please, try again.')) body = bc_templates.tmpl_register_purchase_request_step2( infos=infos, fields=fields, result=None, p=p, f=f, ln=ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) result = search_user(f, p) borrowers_list = [] if len(result) == 0 and f: if CFG_CERN_SITE: infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID.")) else: new_borrower_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/add_new_borrower_step1', {'ln': ln}, _("Register new borrower.")) message = _("0 borrowers found.") + ' ' + new_borrower_link infos.append(message) else: for user in result: borrower_data = db.get_borrower_data_by_id(user[0]) borrowers_list.append(borrower_data) body = bc_templates.tmpl_register_purchase_request_step2( infos=infos, fields=fields, result=borrowers_list, p=p, f=f, ln=ln) ######################## ######################## return page(title=_("Register purchase request"), uid=id_user, req=req, body=body, language=ln, metaheaderadd='' % CFG_SITE_SECURE_URL, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_purchase_request_step3(req, request_type, title, authors, place, publisher, year, edition, this_edition_only, isbn, standard_number, budget_code, cash, period_of_interest_from, period_of_interest_to, additional_comments, borrower_id, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) infos = [] if budget_code == '' and not cash: infos.append(_("Payment method information is mandatory. Please, type your budget code or tick the 'cash' checkbox.")) body = bc_templates.tmpl_register_purchase_request_step1(infos=infos, fields=(request_type, title, authors, place, publisher, year, edition, this_edition_only, isbn, standard_number, budget_code, cash, period_of_interest_from, period_of_interest_to, additional_comments), ln=ln) else: if borrower_id == '': borrower_id = db.get_borrower_id_by_email(db.get_invenio_user_email(id_user)) item_info = {'title': title, 'authors': authors, 'place': place, 'publisher': publisher, 'year' : year, 'edition': edition, 'isbn' : isbn, 'standard_number': standard_number} ill_request_notes = {} if additional_comments: ill_request_notes[time.strftime("%Y-%m-%d %H:%M:%S")] \ = str(additional_comments) if cash and budget_code == '': budget_code = 'cash' db.ill_register_request_on_desk(borrower_id, item_info, period_of_interest_from, period_of_interest_to, CFG_BIBCIRCULATION_ACQ_STATUS_NEW, str(ill_request_notes), this_edition_only, request_type, budget_code) return redirect_to_url(req, '%s/admin2/bibcirculation/list_acquisition?ln=%s&status=%s' % \ (CFG_SITE_SECURE_URL, ln, CFG_BIBCIRCULATION_ACQ_STATUS_NEW)) return page(title=_("Register purchase request"), uid=id_user, req=req, body=body, language=ln, metaheaderadd='' % CFG_SITE_SECURE_URL, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_article_request_step1(req, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] navtrail_previous_links = 'Admin Area' \ ' > ' \ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) body = bc_templates.tmpl_register_ill_article_request_step1(infos=infos, ln=ln) return page(title=_("Register ILL Article request"), uid=id_user, req=req, body=body, metaheaderadd = ""%(CFG_SITE_SECURE_URL), navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_article_request_step2(req, periodical_title, article_title, author, report_number, volume, issue, pages, year, budget_code, issn, period_of_interest_from, period_of_interest_to, additional_comments, key, string, ln=CFG_SITE_LANG): id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] if key and not string: infos.append(_('Empty string.') + ' ' + _('Please, try again.')) article_info = (periodical_title, article_title, author, report_number, volume, issue, pages, year, issn) request_details = (period_of_interest_from, period_of_interest_to, budget_code, additional_comments) body = bc_templates.tmpl_register_ill_article_request_step2( article_info=article_info, request_details=request_details, result=None, key=key, string=string, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) return page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) result = search_user(key, string) borrowers_list = [] if len(result) == 0 and key: if CFG_CERN_SITE: infos.append(_("0 borrowers found.") + ' ' +_("Search by CCID.")) else: new_borrower_link = create_html_link(CFG_SITE_SECURE_URL + '/admin2/bibcirculation/add_new_borrower_step1', {'ln': ln}, _("Register new borrower.")) message = _("0 borrowers found.") + ' ' + new_borrower_link infos.append(message) else: for user in result: borrower_data = db.get_borrower_data_by_id(user[0]) borrowers_list.append(borrower_data) if validate_date_format(period_of_interest_from) is False: infos = [] infos.append(_("The period of interest %(x_strong_tag_open)sFrom: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_from, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_register_ill_article_request_step1(infos=infos, ln=ln) elif validate_date_format(period_of_interest_to) is False: infos = [] infos.append(_("The period of interest %(x_strong_tag_open)sTo: %(x_date)s%(x_strong_tag_close)s is not a valid date or date format") % {'x_date': period_of_interest_to, 'x_strong_tag_open': '', 'x_strong_tag_close': ''}) body = bc_templates.tmpl_register_ill_article_request_step1(infos=infos, ln=ln) else: article_info = (periodical_title, article_title, author, report_number, volume, issue, pages, year, issn) request_details = (period_of_interest_from, period_of_interest_to, budget_code, additional_comments) body = bc_templates.tmpl_register_ill_article_request_step2( article_info=article_info, request_details=request_details, result=borrowers_list, key=key, string=string, infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) return invenio.webpage.page(title=_("Register ILL request"), uid=id_user, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def register_ill_article_request_step3(req, periodical_title, title, authors, report_number, volume, issue, page_number, year, issn, user_info, request_details, ln=CFG_SITE_LANG): #id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) #info = (title, authors, "", "", year, "", issn) #create_ill_record(info) item_info = {'periodical_title': periodical_title, 'title': title, 'authors': authors, 'place': "", 'publisher': "", 'year' : year, 'edition': "", 'issn' : issn, 'volume': volume, 'issue': issue, 'page': page_number } (period_of_interest_from, period_of_interest_to, budget_code, library_notes) = request_details only_edition = "" if user_info is None: return register_ill_article_request_step2(req, periodical_title, title, authors, report_number, volume, issue, page_number, year, budget_code, issn, period_of_interest_from, period_of_interest_to, library_notes, 'name', None, ln) else: borrower_id = user_info[0] ill_request_notes = {} if library_notes: ill_request_notes[time.strftime("%Y-%m-%d %H:%M:%S")] \ = str(library_notes) db.ill_register_request_on_desk(borrower_id, item_info, period_of_interest_from, period_of_interest_to, CFG_BIBCIRCULATION_ILL_STATUS_NEW, str(ill_request_notes), only_edition, 'article', budget_code) return list_ill_request(req, CFG_BIBCIRCULATION_ILL_STATUS_NEW, ln) def ill_search(req, ln=CFG_SITE_LANG): infos = [] navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) body = bc_templates.tmpl_ill_search(infos=infos, ln=ln) return page(title=_("ILL search"), uid=id_user, req=req, body=body, language=ln, metaheaderadd='' % CFG_SITE_SECURE_URL, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def ill_search_result(req, p, f, date_from, date_to, ln): """ Search an item and return a list with all the possible results. To retrieve the information desired, we use the method 'perform_request_search' (from search_engine.py). In the case of BibCirculation, we are just looking for books (items) inside the collection 'Books'. @type p: string @param p: search pattern @type f: string @param f: search field @return: list of recids """ navtrail_previous_links = 'Admin Area' \ ' > '\ 'Circulation Management' \ ' ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, ln) #id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) if not has_date_format(date_from): date_from = '0000-00-00' if not has_date_format(date_to): date_to = '9999-12-31' if f == 'title': ill_req = db.search_ill_requests_title(p, date_from, date_to) body = bc_templates.tmpl_list_ill_request(ill_req=ill_req, ln=ln) elif f == 'ILL_request_ID': ill_req = db.search_ill_requests_id(p, date_from, date_to) body = bc_templates.tmpl_list_ill_request(ill_req=ill_req, ln=ln) elif f == 'cost': ill_req = db.search_acq_requests_cost(p, date_from, date_to) body = bc_templates.tmpl_list_acquisition(ill_req=ill_req, ln=ln) elif f == 'notes': ill_req = db.search_acq_requests_notes(p, date_from, date_to) body = bc_templates.tmpl_list_acquisition(ill_req=ill_req, ln=ln) #body = bc_templates.tmpl_list_ill_request(ill_req=ill_req, ln=ln) return page(title=_("List of ILL requests"), req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def delete_copy_step1(req, barcode, ln): #id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) barcode = barcode.strip('\'" \t') recid = db.get_recid(barcode) if recid: #recid = recid[0] infos.append(_("Do you really want to delete this copy of the book?")) copies = db.get_item_copies_details(recid) title = _("Delete copy") body = bc_templates.tmpl_delete_copy_step1(barcode_to_delete=barcode, recid=recid, result=copies, infos=infos, ln=ln) else: message = _("""The barcode %s was not found""") % (barcode) infos.append(message) title = _("Item search") body = bc_templates.tmpl_item_search(infos=infos, ln=ln) return page(title=title, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def delete_copy_step2(req, barcode, ln): #id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) _ = gettext_set_language(ln) infos = [] barcode = barcode.strip('\'" \t') recid = db.get_recid(barcode) if recid: #recid = recid[0] if db.delete_copy(barcode)==1: message = _("The copy with barcode %s has been deleted.") % (barcode) else: message = _('It was NOT possible to delete the copy with barcode %s') % (barcode) infos.append(message) copies = db.get_item_copies_details(recid) requests = db.get_item_requests(recid) loans = db.get_item_loans(recid) req_hist_overview = db.get_item_requests_historical_overview(recid) loans_hist_overview = db.get_item_loans_historical_overview(recid) title = _("Item details") body = bc_templates.tmpl_get_item_details( recid=recid, copies=copies, requests=requests, loans=loans, req_hist_overview=req_hist_overview, loans_hist_overview=loans_hist_overview, infos=infos, ln=ln) else: message = _("The barcode %s was not found") % (barcode) infos.append(message) title = _("Item search") body = bc_templates.tmpl_item_search(infos=infos, ln=ln) navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) return page(title=title, req=req, body=body, language=ln, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) diff --git a/modules/bibclassify/lib/bibclassify_engine.py b/modules/bibclassify/lib/bibclassify_engine.py index 34b980daf..295f7fb3e 100644 --- a/modules/bibclassify/lib/bibclassify_engine.py +++ b/modules/bibclassify/lib/bibclassify_engine.py @@ -1,696 +1,699 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibClassify engine. This module is the main module of BibClassify. its two main methods are output_keywords_for_sources and get_keywords_from_text. The first one output keywords for a list of sources (local files or URLs, PDF or text) while the second one outputs the keywords for text lines (which are obtained using the module bibclassify_text_normalizer). This module also takes care of the different outputs (text, MARCXML or HTML). But unfortunately there is a confusion between running in a standalone mode and producing output suitable for printing, and running in a web-based mode where the webtemplate is used. For the moment the pieces of the representation code are left in this module. This module is STANDALONE safe """ try: import psyco psyco.full() except: pass import os import random import sys import time import bibclassify_config as bconfig log = bconfig.get_logger("bibclassify.engine") import bibclassify_ontology_reader as reader import bibclassify_text_extractor as extractor import bibclassify_text_normalizer as normalizer import bibclassify_keyword_analyzer as keyworder import bibclassify_acronym_analyzer as acronymer - - - +try: + from invenio.urlutils import make_user_agent_string +except ImportError: + ## Not in Invenio, we simply use default agent + def make_user_agent_string(component=None): + return bconfig.CFG_BIBCLASSIFY_USER_AGENT # --------------------------------------------------------------------- # API # --------------------------------------------------------------------- def output_keywords_for_sources(input_sources, taxonomy_name, output_mode="text", output_limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False, match_mode="full", no_cache=False, with_author_keywords=False, rebuild_cache=False, only_core_tags=False, extract_acronyms=False, **kwargs): """Outputs the keywords for each source in sources.""" # Inner function which does the job and it would be too much work to # refactor the call (and it must be outside the loop, before it did # not process multiple files) def process_lines(): if output_mode == "text": print "Input file: %s" % source output = get_keywords_from_text(text_lines, taxonomy_name, output_mode=output_mode, output_limit=output_limit, spires=spires, match_mode=match_mode, no_cache=no_cache, with_author_keywords=with_author_keywords, rebuild_cache=rebuild_cache, only_core_tags=only_core_tags, extract_acronyms=extract_acronyms ) print output # Get the fulltext for each source. for entry in input_sources: log.info("Trying to read input file %s." % entry) text_lines = None source = "" if os.path.isdir(entry): for filename in os.listdir(entry): filename = os.path.join(entry, filename) if os.path.isfile(filename): text_lines = extractor.text_lines_from_local_file(filename) if text_lines: source = filename process_lines() elif os.path.isfile(entry): text_lines = extractor.text_lines_from_local_file(entry) if text_lines: source = os.path.basename(entry) process_lines() else: # Treat as a URL. text_lines = extractor.text_lines_from_url(entry, - user_agent=bconfig.CFG_BIBCLASSIFY_USER_AGENT) + user_agent=make_user_agent_string("BibClassify")) if text_lines: source = entry.split("/")[-1] process_lines() def get_keywords_from_local_file(local_file, taxonomy_name, output_mode="text", output_limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False, match_mode="full", no_cache=False, with_author_keywords=False, rebuild_cache=False, only_core_tags=False, extract_acronyms=False, **kwargs ): """Outputs keywords reading a local file. Arguments and output are the same as for @see: get_keywords_from_text() """ log.info("Analyzing keywords for local file %s." % local_file) text_lines = extractor.text_lines_from_local_file(local_file) return get_keywords_from_text(text_lines, taxonomy_name, output_mode=output_mode, output_limit=output_limit, spires=spires, match_mode=match_mode, no_cache=no_cache, with_author_keywords=with_author_keywords, rebuild_cache=rebuild_cache, only_core_tags=only_core_tags, extract_acronyms=extract_acronyms) def get_keywords_from_text(text_lines, taxonomy_name, output_mode="text", output_limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER, spires=False, match_mode="full", no_cache=False, with_author_keywords=False, rebuild_cache=False, only_core_tags=False, extract_acronyms=False, **kwargs): """Extracts keywords from the list of strings @var text_lines: list of strings (will be normalized before being joined into one string) @keyword taxonomy_name: string, name of the taxonomy_name @keyword output_mode: string - text|html|marcxml|raw @keyword output_limit: int @keyword spires: boolean, if True marcxml output reflect spires codes @keyword match_mode: str - partial|full; in partial mode only beginning of the fulltext is searched @keyword no_cache: boolean, means loaded definitions will not be saved @keyword with_author_keywords: boolean, extract keywords from the pdfs @keyword rebuild_cache: boolean @keyword only_core_tags: boolean @return: if output_mode=raw, it will return (single_keywords, composite_keywords, author_keywords, acronyms) for other output modes it returns formatted string """ start_time = time.time() cache = reader.get_cache(taxonomy_name) if not cache: reader.set_cache(taxonomy_name, reader.get_regular_expressions(taxonomy_name, rebuild=rebuild_cache, no_cache=no_cache)) cache = reader.get_cache(taxonomy_name) _skw = cache[0] _ckw = cache[1] text_lines = normalizer.cut_references(text_lines) fulltext = normalizer.normalize_fulltext("\n".join(text_lines)) if match_mode == "partial": fulltext = _get_partial_text(fulltext) author_keywords = None if with_author_keywords: author_keywords = extract_author_keywords(_skw, _ckw, fulltext) acronyms = {} if extract_acronyms: acronyms = extract_abbreviations(fulltext) single_keywords = extract_single_keywords(_skw, fulltext) composite_keywords = extract_composite_keywords(_ckw, fulltext, single_keywords) if only_core_tags: single_keywords = clean_before_output(_filter_core_keywors(single_keywords)) composite_keywords = _filter_core_keywors(composite_keywords) else: # Filter out the "nonstandalone" keywords single_keywords = clean_before_output(single_keywords) log.info('Keywords generated in: %.1f sec' % (time.time() - start_time)) if output_mode == "raw": if output_limit: return (_kw(_sort_kw_matches(single_keywords, output_limit)), _kw(_sort_kw_matches(composite_keywords, output_limit)), author_keywords, # this we don't limit (?) _kw(_sort_kw_matches(acronyms, output_limit))) else: return (single_keywords, composite_keywords, author_keywords, acronyms) else: return get_keywords_output(single_keywords, composite_keywords, taxonomy_name, author_keywords, acronyms, output_mode, output_limit, spires, only_core_tags) def extract_single_keywords(skw_db, fulltext): """Find single keywords in the fulltext @var skw_db: list of KeywordToken objects @var fulltext: string, which will be searched @return : dictionary of matches in a format { , [[position, position...], ], .. } or empty {} """ return keyworder.get_single_keywords(skw_db, fulltext) or {} def extract_composite_keywords(ckw_db, fulltext, skw_spans): """Returns a list of composite keywords bound with the number of occurrences found in the text string. @var ckw_db: list of KewordToken objects (they are supposed to be composite ones) @var fulltext: string to search in @skw_spans: dictionary of already identified single keywords @return : dictionary of matches in a format { , [[position, position...], [info_about_matches] ], .. } or empty {} """ return keyworder.get_composite_keywords(ckw_db, fulltext, skw_spans) or {} def extract_abbreviations(fulltext): """Extract acronyms from the fulltext @var fulltext: utf-8 string @return: dictionary of matches in a formt { , [matched skw or ckw object, ....] } or empty {} """ acronyms = {} K = reader.KeywordToken for k, v in acronymer.get_acronyms(fulltext).items(): acronyms[K(k, type='acronym')] = v return acronyms def extract_author_keywords(skw_db, ckw_db, fulltext): """Finds out human defined keyowrds in a text string. Searches for the string "Keywords:" and its declinations and matches the following words. @var skw_db: list single kw object @var ckw_db: list of composite kw objects @var fulltext: utf-8 string @return: dictionary of matches in a formt { , [matched skw or ckw object, ....] } or empty {} """ akw = {} K = reader.KeywordToken for k, v in keyworder.get_author_keywords(skw_db, ckw_db, fulltext).items(): akw[K(k, type='author-kw')] = v return akw # --------------------------------------------------------------------- # presentation functions # --------------------------------------------------------------------- def get_keywords_output(single_keywords, composite_keywords, taxonomy_name, author_keywords=None, acronyms=None, style="text", output_limit=0, spires=False, only_core_tags=False): """Returns a formatted string representing the keywords according to the chosen style. This is the main routing call, this function will also strip unwanted keywords before output and limits the number of returned keywords @var single_keywords: list of single keywords @var composite_keywords: list of composite keywords @var taxonomy_name: string, taxonomy name @keyword author_keywords: dictionary of author keywords extracted from fulltext @keyword acronyms: dictionary of extracted acronyms @keyword style: text|html|marc @keyword output_limit: int, number of maximum keywords printed (it applies to single and composite keywords separately) @keyword spires: boolen meaning spires output style @keyword only_core_tags: boolean """ # sort the keywords, but don't limit them (that will be done later) single_keywords = _sort_kw_matches(single_keywords) composite_keywords = _sort_kw_matches(composite_keywords) if style == "text": return _output_text(single_keywords, composite_keywords, author_keywords, acronyms, spires, only_core_tags, limit=output_limit) elif style == "marcxml": return _output_marc(single_keywords, composite_keywords, author_keywords, acronyms) elif style == "html": return _output_html(single_keywords, composite_keywords, author_keywords, acronyms, spires, taxonomy_name, limit=output_limit) def build_marc(recid, single_keywords, composite_keywords, spires=False, author_keywords=None, acronyms=None): """Creates xml record @recid: ingeter @var single_keywords: dictionary of kws @var composite_keywords: dictionary of kws @keyword spires: please don't use, left for historical reasons @keyword author_keywords: dictionary of extracted keywords @keyword acronyms: dictionary of extracted acronyms @return: str, marxml """ output = ['\n' '%s' % recid] # no need to sort single_keywords = single_keywords.items() composite_keywords = composite_keywords.items() output.append(_output_marc(single_keywords, composite_keywords, author_keywords, acronyms)) output.append('') return '\n'.join(output) def _output_marc(skw_matches, ckw_matches, author_keywords, acronyms, spires=False, kw_field=bconfig.CFG_MAIN_FIELD, auth_field=bconfig.CFG_AUTH_FIELD, acro_field=bconfig.CFG_ACRON_FIELD, provenience='BibClassify'): """Outputs the keywords in the MARCXML format. @var skw_matches: list of single keywords @var ckw_matches: list of composite keywords @var author_keywords: dictionary of extracted author keywords @var acronyms: dictionary of acronyms @var spires: boolean, True=generate spires output - BUT NOTE: it is here only not to break compatibility, in fact spires output should never be used for xml because if we read marc back into the KeywordToken objects, we would not find them @keyword provenience: string that identifies source (authority) that assigned the contents of the field @return: string, formatted MARC""" kw_template = ('\n' ' %s\n' ' %s\n' ' %s\n' ' %s\n' '\n') output = [] tag, ind1, ind2 = _parse_marc_code(kw_field) for keywords in (skw_matches, ckw_matches): if keywords and len(keywords): for kw, info in keywords: output.append(kw_template % (tag, ind1, ind2, provenience, kw.output(spires), len(info[0]), kw.getType())) for field, keywords in ((auth_field, author_keywords), (acro_field, acronyms)): if keywords and len(keywords) and field: # field='' we shall not save the keywords tag, ind1, ind2 = _parse_marc_code(field) for kw, info in keywords.items(): output.append(kw_template % (tag, ind1, ind2, provenience, kw, '', kw.getType())) return "".join(output) def _output_text(skw_matches=None, ckw_matches=None, author_keywords=None, acronyms=None, spires=False, only_core_tags=False, limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER): """Outputs the results obtained in text format. @var skw_matches: sorted list of single keywords @var ckw_matches: sorted list of composite keywords @var author_keywords: dictionary of author keywords @var acronyms: dictionary of acronyms @var spires: boolean @var only_core_tags: boolean @keyword limit: int, number of printed keywords @return: str, html formatted output """ output = [] if limit: resized_skw = skw_matches[0:limit] resized_ckw = ckw_matches[0:limit] else: resized_skw = skw_matches resized_ckw = ckw_matches if only_core_tags: output.append('\nCore keywords:\n' + '\n'.join(_get_core_keywords(skw_matches, ckw_matches, spires=spires) or ['--'])) else: output.append('\nAuthor keywords:\n' + '\n'.join(_get_author_keywords(author_keywords, spires=spires) or ['--'])) output.append('\nComposite keywords:\n' + '\n'.join(_get_compositekws(resized_ckw, spires=spires) or ['--'])) output.append('\nSingle keywords:\n' + '\n'.join(_get_singlekws(resized_skw, spires=spires) or ['--'])) output.append('\nCore keywords:\n' + '\n'.join(_get_core_keywords(skw_matches, ckw_matches, spires=spires) or ['--'])) output.append('\nField codes:\n' + '\n'.join(_get_fieldcodes(resized_skw, resized_ckw, spires=spires) or ['--'])) output.append('\nAcronyms:\n' + '\n'.join(_get_acronyms(acronyms) or ['--'])) output.append('\n--\n%s' % _signature()) return "\n".join(output) + "\n" def _output_html(skw_matches=None, ckw_matches=None, author_keywords=None, acronyms=None, spires=False, only_core_tags=False, limit=bconfig.CFG_BIBCLASSIFY_DEFAULT_OUTPUT_NUMBER): """Output the same as txt output does, but HTML formatted @var skw_matches: sorted list of single keywords @var ckw_matches: sorted list of composite keywords @var author_keywords: dictionary of extracted author keywords @var acronyms: dictionary of acronyms @var spires: boolean @var only_core_tags: boolean @keyword limit: int, number of printed keywords @return: str, html formatted output """ output = _output_text(skw_matches, ckw_matches, author_keywords, acronyms, spires, only_core_tags, limit) output = output.replace('\n', '
') return """ Automatically generated keywords by bibclassify %s """ % output def _get_singlekws(skw_matches, spires=False): """ @var skw_matches: dict of {keyword: [info,...]} @keyword spires: bool, to get the spires output @return: list of formatted keywords """ output = [] for single_keyword, info in skw_matches: output.append("%d %s" % (len(info[0]), single_keyword.output(spires))) return output def _get_compositekws(ckw_matches, spires=False): """ @var ckw_matches: dict of {keyword: [info,...]} @keyword spires: bool, to get the spires output @return: list of formatted keywords """ output = [] for composite_keyword, info in ckw_matches: output.append("%d %s %s" % (len(info[0]), composite_keyword.output(spires), info[1])) return output def _get_acronyms(acronyms): """Returns a formatted list of acronyms.""" acronyms_str = [] if acronyms: for acronym, expansions in acronyms.iteritems(): expansions_str = ", ".join(["%s (%d)" % expansion for expansion in expansions]) acronyms_str.append("%s %s" % (acronym, expansions_str)) return sorted(acronyms_str) def _get_author_keywords(author_keywords, spires=False): """Formats the output for the author keywords. @return: list of formatted author keywors """ out = [] if author_keywords: for keyword, matches in author_keywords.items(): skw_matches = matches[0] #dictionary of single keywords ckw_matches = matches[1] #dict of composite keywords matches_str = [] for ckw, spans in ckw_matches.items(): matches_str.append('"%s"' % ckw.output(spires)) for skw, spans in skw_matches.items(): matches_str.append('"%s"' % skw.output(spires)) if matches_str: out.append('"%s" matches %s' % (keyword, ", ".join(matches_str))) else: out.append('"%s" matches no keyword.' % keyword) return sorted(out) def _get_fieldcodes(skw_matches, ckw_matches, spires=False): """Returns the output for the field codes. @var skw_matches: dict of {keyword: [info,...]} @var ckw_matches: dict of {keyword: [info,...]} @keyword spires: bool, to get the spires output @return: string""" fieldcodes = {} output = [] for skw, _ in skw_matches: for fieldcode in skw.fieldcodes: fieldcodes.setdefault(fieldcode, set()).add(skw.output(spires)) for ckw, _ in ckw_matches: if len(ckw.fieldcodes): for fieldcode in ckw.fieldcodes: fieldcodes.setdefault(fieldcode, set()).add(ckw.output(spires)) else: #inherit field-codes from the composites for kw in ckw.getComponents(): for fieldcode in kw.fieldcodes: fieldcodes.setdefault(fieldcode, set()).add('%s*' % ckw.output(spires)) fieldcodes.setdefault('*', set()).add(kw.output(spires)) for fieldcode, keywords in fieldcodes.items(): output.append('%s: %s' % (fieldcode, ', '.join(keywords))) return sorted(output) def _get_core_keywords(skw_matches, ckw_matches, spires=False): """Returns the output for the field codes. @var skw_matches: dict of {keyword: [info,...]} @var ckw_matches: dict of {keyword: [info,...]} @keyword spires: bool, to get the spires output @return: set of formatted core keywords """ output = set() def _get_value_kw(kw): '''Inner function to help to sort the Core keywords''' i = 0 while kw[i].isdigit(): i += 1 if i > 0: return int(kw[:i]) else: return 0 for skw, info in skw_matches: if skw.core: output.add('%d %s' % (len(info[0]), skw.output(spires))) for ckw, info in ckw_matches: if ckw.core: output.add('%d %s' % (len(info[0]), ckw.output(spires))) else: #test if one of the components is not core i = 0 for c in ckw.getComponents(): if c.core: output.add('- %s (%s)' % (c.output(spires), info[1][i])) i += 1 return sorted(output, key=_get_value_kw , reverse=True) def _filter_core_keywors(keywords): matches = {} for kw, info in keywords.items(): if kw.core: matches[kw] = info return matches def _signature(): """Prints out the bibclassify signature @todo: add information about taxonomy, rdflib""" return 'bibclassify v%s' % (bconfig.VERSION,) def clean_before_output(kw_matches): """Returns a clean copy of the keywords data structure - ie. stripped off the standalone and other unwanted elements""" filtered_kw_matches = {} for kw_match, info in kw_matches.iteritems(): if not kw_match.nostandalone: filtered_kw_matches[kw_match] = info return filtered_kw_matches # --------------------------------------------------------------------- # helper functions # --------------------------------------------------------------------- def _skw_matches_comparator(kw0, kw1): """ Compares 2 single keywords objects - first by the number of their spans (ie. how many times they were found), if it is equal it compares them by lenghts of their labels. """ list_comparison = cmp(len(kw1[1][0]), len(kw0[1][0])) if list_comparison: return list_comparison if kw0[0].isComposite() and kw1[0].isComposite(): component_avg0 = sum(kw0[1][1]) / len(kw0[1][1]) component_avg1 = sum(kw1[1][1]) / len(kw1[1][1]) component_comparison = cmp(component_avg1, component_avg0) if component_comparison: return component_comparison return cmp(len(str(kw1[0])), len(str(kw0[0]))) def _kw(keywords): """Turns list of keywords into dictionary""" r = {} for k,v in keywords: r[k] = v return r def _sort_kw_matches(skw_matches, limit=0): """Returns a resized version of data structures of keywords to the given length.""" sorted_keywords = list(skw_matches.items()) sorted_keywords.sort(_skw_matches_comparator) return limit and sorted_keywords[:limit] or sorted_keywords def _get_partial_text(fulltext): """Returns a shortened version of the fulltext used with the partial matching mode. The version is composed of 20% in the beginning and 20% in the middle of the text.""" length = len(fulltext) get_index = lambda x: int(float(x) / 100 * length) partial_text = [fulltext[get_index(start):get_index(end)] for start, end in bconfig.CFG_BIBCLASSIFY_PARTIAL_TEXT] return "\n".join(partial_text) def save_keywords(filename, xml): tmp_dir = os.path.dirname(filename) if not os.path.isdir(tmp_dir): os.mkdir(tmp_dir) file_desc = open(filename, "w") file_desc.write(xml) file_desc.close() def get_tmp_file(recid): tmp_directory = "%s/bibclassify" % bconfig.CFG_TMPDIR if not os.path.isdir(tmp_directory): os.mkdir(tmp_directory) filename = "bibclassify_%s.xml" % recid abs_path = os.path.join(tmp_directory, filename) return abs_path def _parse_marc_code(field): """Parses marc field and return default indicators if not filled in""" field = str(field) if len(field) < 4: raise Exception ('Wrong field code: %s' % field) else: field += '__' tag = field[0:3] ind1 = field[3].replace('_', '') ind2 = field[4].replace('_', '') return tag, ind1, ind2 if __name__ == "__main__": log.error("Please use bibclassify_cli from now on.") diff --git a/modules/bibclassify/lib/bibclassify_ontology_reader.py b/modules/bibclassify/lib/bibclassify_ontology_reader.py index 52654a817..d33c4a58f 100644 --- a/modules/bibclassify/lib/bibclassify_ontology_reader.py +++ b/modules/bibclassify/lib/bibclassify_ontology_reader.py @@ -1,1101 +1,1104 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibClassify ontology reader. The ontology reader reads currently either a RDF/SKOS taxonomy or a simple controlled vocabulary file (1 word per line). The first role of this module is to manage the cached version of the ontology file. The second role is to hold all methods responsible for the creation of regular expressions. These methods are grammatically related as we take care of different forms of the same words. The grammatical rules can be configured via the configuration file. The main method from this module is get_regular_expressions. This module is standalone safe. """ try: import psyco psyco.full() except: pass from datetime import datetime, timedelta import cPickle import os import re import sys import tempfile import time import urllib2 import traceback import xml.sax import thread import time try: import rdflib rdflib_exceptions_Error = rdflib.exceptions.Error except ImportError: rdflib = None rdflib_exceptions_Error = None import bibclassify_config as bconfig log = bconfig.get_logger("bibclassify.ontology_reader") import config # only if not running in a stanalone mode if bconfig.STANDALONE: dbquery = None + from urllib2 import urlopen else: import dbquery + from urlutils import make_invenio_opener + urlopen = make_invenio_opener('BibClassify').open _contains_digit = re.compile("\d") _starts_with_non = re.compile("(?i)^non[a-z]") _starts_with_anti = re.compile("(?i)^anti[a-z]") _split_by_punctuation = re.compile("(\W+)") _CACHE = {} def get_cache(taxonomy_id): """Returns thread-safe cache for the given taxonomy id, @var taxonomy_id: str, identifier of the taxonomy @return: dictionary object (empty if no taxonomy_id is found), you must not change anything inside it. Create a new dictionary and use set_cache if you want to update the cache! """ # Because of a standalone mode, we don't use the # invenio.data_cacher.DataCacher, but it has no effect # on proper functionality. if _CACHE.has_key(taxonomy_id): ctime, taxonomy = _CACHE[taxonomy_id] # check it is fresh version onto_name, onto_path, onto_url = _get_ontology(taxonomy_id) cache_path = _get_cache_path(onto_name) # if source exists and is newer than the cache hold in memory if os.access(onto_path, os.R_OK) and os.path.getmtime(cache_path) > ctime: log.info('Forcing taxonony rebuild as cached version is newer/updated.') return {} # force cache rebuild # if cache exists and is newer than the cache hold in memory if os.access(cache_path, os.R_OK) and os.path.getmtime(cache_path) > ctime: log.info('Forcing taxonony rebuild as source file is newer/updated.') return {} log.info('Taxonomy retrieved from cache') return taxonomy return {} def set_cache(taxonomy_id, contents): """Updates cache in a thread-safe manner""" lock = thread.allocate_lock() lock.acquire() try: _CACHE[taxonomy_id] = (time.time(), contents) finally: lock.release() def get_regular_expressions(taxonomy_name, rebuild=False, no_cache=False): """Returns a list of patterns compiled from the RDF/SKOS ontology. Uses cache if it exists and if the taxonomy hasn't changed.""" # Translate the ontology name into a local path. Check if the name # relates to an existing ontology. onto_name, onto_path, onto_url = _get_ontology(taxonomy_name) if not onto_path: log.error("Unable to locate the taxonomy: '%s'." % taxonomy_name) raise Exception("Unable to locate the taxonomy: '%s'." % taxonomy_name) cache_path = _get_cache_path(onto_name) log.debug('Taxonomy discovered, now we load it (from cache: %s, onto_path: %s, cache_path: %s)' % (not no_cache, onto_path, cache_path)) if os.access(cache_path, os.R_OK): if os.access(onto_path, os.R_OK): if rebuild or no_cache: log.debug("Cache generation was manually forced.") if os.access(onto_path, os.R_OK): return _build_cache(onto_path, skip_cache=no_cache) else: # ontology file not found. Use the cache instead. log.warning("The ontology couldn't be located. However " "a cached version of it is available. Using it as a " "reference.") return _get_cache(cache_path, source_file=onto_path) if (os.path.getmtime(cache_path) > os.path.getmtime(onto_path)): # Cache is more recent than the ontology: use cache. log.debug("Normal situation, cache is older than ontology, so we load it from cache") return _get_cache(cache_path, source_file=onto_path) else: # Ontology is more recent than the cache: rebuild cache. log.warning("Cache '%s' is older than '%s'. We will rebuild the cache" % (cache_path, onto_path)) return _build_cache(onto_path, skip_cache=no_cache) elif os.access(onto_path, os.R_OK): if not no_cache and os.path.exists(cache_path) and not os.access(cache_path, os.W_OK): log.error('We cannot read/write into: %s. Aborting!' % cache_path) raise Exception('We cannot read/write into: %s. Aborting!' % cache_path) elif not no_cache and os.path.exists(cache_path): log.warning('Cache %s exists, but is not readable!' % cache_path) log.info("Cache not available. Building it now: %s" % onto_path) return _build_cache(onto_path, skip_cache=no_cache) else: log.error("We miss both source and cache of the taxonomy: %s" % taxonomy_name) raise Exception("We miss both source and cache of the taxonomy: %s" % taxonomy_name) def _get_remote_ontology(onto_url, time_difference=None): """Checks if the online ontology is more recent than the local ontology. If yes, try to download and store it in Invenio's cache directory. Return a boolean describing the success of the operation. Returns path to the downloaded ontology""" if onto_url is None: return False dl_dir = ((config.CFG_CACHEDIR or tempfile.gettempdir()) + os.sep + "bibclassify" + os.sep) if not os.path.exists(dl_dir): os.mkdir(dl_dir) local_file = dl_dir + os.path.basename(onto_url) remote_modif_time = _get_last_modification_date(onto_url) try: local_modif_seconds = os.path.getmtime(local_file) except OSError: # The local file does not exist. Download the ontology. download = True log.info("The local ontology could not be found.") else: local_modif_time = datetime(*time.gmtime(local_modif_seconds)[0:6]) # Let's set a time delta of 1 hour and 10 minutes. time_difference = time_difference or timedelta(hours=1, minutes=10) download = remote_modif_time > local_modif_time + time_difference if download: log.info("The remote ontology '%s' is more recent " "than the local ontology." % onto_url) if download: if not _download_ontology(onto_url, local_file): log.warning("Error downloading the ontology from: %s" % onto_url) return local_file def _get_ontology(ontology): """Returns the (name, path, url) to the short ontology name. @var ontology: name of the ontology or path to the file or url""" onto_name = onto_path = onto_url = None # first assume we got the path to the file if os.access(ontology, os.R_OK): onto_name = os.path.split(os.path.abspath(ontology))[1] onto_path = os.path.abspath(ontology) onto_url = "" else: # if not, try to find it in a known locations discovered_file = _discover_ontology(ontology) if discovered_file: onto_name = os.path.split(discovered_file)[1] onto_path = discovered_file # i know, this sucks x = ontology.lower() if "http:" in x or "https:" in x or "ftp:" in x or "file:" in x: onto_url = ontology else: onto_url = "" else: # not found, look into a database (it is last because when bibclassify # runs in a standalone mode, it has no database - [rca, old-heritage] if not bconfig.STANDALONE: result = dbquery.run_sql("SELECT name, location from clsMETHOD WHERE name LIKE %s", ('%'+ontology+'%',)) for onto_short_name, url in result: onto_name = onto_short_name onto_path = _get_remote_ontology(url) onto_url = url return (onto_name, onto_path, onto_url) def _discover_ontology(ontology_name): """ Looks for the file in a known places (Inside invenio/etc/bibclassify) and a few other places like current dir @var ontology: string, name or path name or url @return: absolute path of a file if found, or None """ last_part = os.path.split(os.path.abspath(ontology_name))[1].lower() possible_patterns = [last_part + ".rdf", last_part] places = [config.CFG_CACHEDIR, config.CFG_ETCDIR, os.path.join(config.CFG_CACHEDIR, "bibclassify"), os.path.join(config.CFG_ETCDIR, "bibclassify"), os.path.abspath('.'), os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../etc/bibclassify")), os.path.join(os.path.dirname(__file__), "bibclassify"), config.CFG_WEBDIR ] log.debug("Searching for taxonomy using string: %s" % last_part) log.debug("Possible patterns: %s" % possible_patterns) for path in places: if os.path.isdir(path): log.debug("Listing: %s" % path) for filename in os.listdir(path): #log.debug('Testing: %s' % filename) for pattern in possible_patterns: filename_lc = filename.lower() if pattern == filename_lc and os.path.exists(os.path.join(path, filename)): filepath = os.path.abspath(os.path.join(path, filename)) if (os.access(filepath, os.R_OK)): log.debug("Found taxonomy at: %s" % filepath) return filepath else: log.warning('Found taxonony at: %s, but it is not readable. Continue searching...' % filepath) log.debug("No taxonomy with pattern '%s' found" % ontology_name) class KeywordToken: # this tells pickle that the class we are pickling is coming from # module 'bibclassify_ontology_reader' instead of invenio.bibclassify_ontology_reader #__module__ = os.path.splitext(os.path.basename(__file__))[0] def __init__(self, subject, store=None, namespace=None, type='HEP'): """KeywordToken is a class used for the extracted keywords It can be initialized with values from RDF store or from sinmple strings. Specialty of this class is that objects are hashable by subject - so in the dictionary two objects with the same subject appears as one -- @see: self.__hash__ and self.__cmp__ @var subject: string or RDF object @keyword store: RDF graph object (will be used to get info about the subject) @keyword namespace: RDF namespace object, used together with store @keyword type: string, type of this keyword """ self.id = subject self.type = type self.short_id = subject self.concept = "" self.regex = [] self.nostandalone = False self.spires = False self.fieldcodes = [] self.compositeof = [] self.core = False self._composite = '#Composite' in subject # True means composite keyword self.__hash = None # the tokens are coming possibly from a normal text file if store is None: subject = subject.strip() self.concept = subject self.regex = _get_searchable_regex(basic=[subject]) self.nostandalone = False self.fieldcodes = [] self.core = False if subject.find(' ') > -1: self._composite = True # definitions from rdf else: self.short_id = self.short_id.split('#')[-1] #find alternate names for this label basic_labels = [] #turn those patterns into regexes only for simple keywords if self._composite is False: try: for label in store.objects(subject, namespace["prefLabel"]): basic_labels.append(str(label)) # XXX shall i make it unicode? except TypeError: pass self.concept = basic_labels[0] else: try: self.concept = str(store.value(subject, namespace["prefLabel"], any=True)) except KeyError: log.warning("Keyword with subject %s has no prefLabel. We use raw name" % self.short_id) self.concept = self.short_id # this is common both to composite and simple keywords try: for label in store.objects(subject, namespace["altLabel"]): basic_labels.append(str(label)) except TypeError: pass #hidden labels are special (possibly regex) codes hidden_labels = [] try: for label in store.objects(subject, namespace["hiddenLabel"]): hidden_labels.append(unicode(label)) except TypeError: pass # compile regular expression that will identify this token self.regex = _get_searchable_regex(basic_labels, hidden_labels) try: for note in map(lambda s: str(s).lower().strip(), store.objects(subject, namespace["note"])): if note == 'core': self.core = True elif note in ("nostandalone", "nonstandalone"): self.nostandalone = True elif 'fc:' in note: self.fieldcodes.append(note[3:].strip()) except TypeError: pass # spiresLabel does not have multiple values spires_label = store.value(subject, namespace["spiresLabel"]) if spires_label: self.spires = str(spires_label) # important for comparisons self.__hash = hash(self.short_id) # extract composite parts ids if store is not None and self.isComposite(): small_subject = self.id.split("#Composite.")[-1] component_positions = [] for label in store.objects(self.id, namespace["compositeOf"]): strlabel = str(label).split("#")[-1] component_name = label.split("#")[-1] component_positions.append((small_subject.find(component_name), strlabel)) component_positions.sort() if not component_positions: log.error("Keyword is marked as composite, but no composite components refs found: %s" \ % self.short_id) else: self.compositeof = map(lambda x: x[1], component_positions) def refreshCompositeOf(self, single_keywords, composite_keywords, store=None, namespace=None): """Re-checks sub-parts of this keyword - this should be called after the whole RDF was processed, because it is using a cache of single keywords and if that one is incomplete, you will not identify all parts """ def _get_ckw_components(new_vals, label): if label in single_keywords: new_vals.append(single_keywords[label]) elif ('Composite.%s' % label) in composite_keywords: for l in composite_keywords['Composite.%s' % label].compositeof: _get_ckw_components(new_vals, l) elif label in composite_keywords: for l in composite_keywords[label].compositeof: _get_ckw_components(new_vals, l) else: # One single or composite keyword is not present in the taxonomy. This is due to an error in the taxonomy description. log.error("The composite term \"%s\" should be made of single keywords, but at least one is missing" % self.id) if store is not None: log.error("Needed components: %s" % list(store.objects(self.id, namespace["compositeOf"]))) log.error("Missing is: %s" % label) raise Exception() if self.compositeof: new_vals = [] try: for label in self.compositeof: _get_ckw_components(new_vals, label) self.compositeof = new_vals except: # the composites will be empty (better than to have confusing, partial matches) self.compositeof = [] log.error('We reset this composite keyword, so that it does not match anything. Please fix the taxonomy.') def isComposite(self): return self._composite def getComponents(self): return self.compositeof def getType(self): return self.type def setType(self, value): self.type = value def __hash__(self): """this might change in the future but for the moment we want to think that if the concept is the same, then it is the same keyword - this sucks, but it is sort of how it is necessary to use now""" return self.__hash def __cmp__(self, other): if self.__hash < other.__hash__(): return -1 elif self.__hash == other.__hash__(): return 0 else: return 1 def __str__(self, spires=False): """Returns the best output for the keyword.""" if spires: if self.spires: return self.spires elif self._composite: return self.concept.replace(':', ',') # default action return self.concept def output(self, spires=False): return self.__str__(spires=spires) def __repr__(self): return "" % self.short_id def _build_cache(source_file, skip_cache=False): """Builds the cached data by parsing the RDF taxonomy file or a vocabulary file. @var source_file: source file of the taxonomy, RDF file @keyword skip_cache: boolean, if True, build cache will not be saved (pickled) - it is saved as """ if rdflib: if rdflib.__version__ >= '2.3.2': store = rdflib.ConjunctiveGraph() else: store = rdflib.Graph() else: store = None if skip_cache: log.info("You requested not to save the cache to disk.") else: cache_path = _get_cache_path(source_file) cache_dir = os.path.dirname(cache_path) # Make sure we have a cache_dir readable and writable. try: os.makedirs(cache_dir) except: pass if os.access(cache_dir, os.R_OK): if not os.access(cache_dir, os.W_OK): log.error("Cache directory exists but is not writable. Check your permissions for: %s" % cache_dir) raise Exception("Cache directory exists but is not writable. Check your permissions for: %s" % cache_dir) else: log.error("Cache directory does not exist (and could not be created): %s" % cache_dir) raise Exception("Cache directory does not exist (and could not be created): %s" % cache_dir) timer_start = time.clock() namespace = None single_keywords, composite_keywords = {}, {} try: if not rdflib: raise ImportError() # will be caught below log.info("Building RDFLib's conjunctive graph from: %s" % source_file) try: store.parse(source_file) except urllib2.URLError, exc: if source_file[0] == '/': store.parse("file://" + source_file) else: store.parse("file:///" + source_file) except rdflib_exceptions_Error, e: log.error("Serious error reading RDF file") log.error(e) log.error(traceback.format_exc()) raise rdflib.exceptions.Error(e) except (xml.sax.SAXParseException, ImportError), e: # File is not a RDF file. We assume it is a controlled vocabulary. log.error(e) log.error("The ontology file is probably not a valid RDF file. \ Assuming it is a controlled vocabulary file.") filestream = open(source_file, "r") for line in filestream: keyword = line.strip() kt = KeywordToken(keyword) single_keywords[kt.short_id] = kt if not len(single_keywords): raise Exception('Probably a wrong dictionary') else: #ok, no exception happened log.info("Now building cache of keywords") # File is a RDF file. namespace = rdflib.Namespace("http://www.w3.org/2004/02/skos/core#") single_count = 0 composite_count = 0 for subject, pref_label in store.subject_objects(namespace["prefLabel"]): kt = KeywordToken(subject, store=store, namespace=namespace) if kt.isComposite(): composite_count += 1 composite_keywords[kt.short_id] = kt #log.info("saved composite: %s" % kt.short_id) else: single_keywords[kt.short_id] = kt single_count += 1 cached_data = {} cached_data["single"] = single_keywords cached_data["composite"] = composite_keywords cached_data["creation_time"] = time.gmtime() cached_data["version_info"] = {'rdflib': rdflib and rdflib.__version__, 'bibclassify': bconfig.VERSION} log.debug("Building taxonomy... %d terms built in %.1f sec." % (len(single_keywords) + len(composite_keywords), time.clock() - timer_start)) log.info("Total count of single keywords: %d " % len(single_keywords)) log.info("Total count of composite keywords: %d " % len(composite_keywords)) if not skip_cache: cache_path = _get_cache_path(source_file) cache_dir = os.path.dirname(cache_path) log.debug("Writing the cache into: %s" % cache_path) # test again, it could have changed if os.access(cache_dir, os.R_OK): if os.access(cache_dir, os.W_OK): # Serialize. filestream = None try: filestream = open(cache_path, "wb") except IOError, msg: # Impossible to write the cache. log.error("Impossible to write cache to '%s'." % cache_path) log.error(msg) else: log.debug("Writing cache to file %s" % cache_path) cPickle.dump(cached_data, filestream, 1) if filestream: filestream.close() else: raise Exception("Cache directory exists but is not writable. Check your permissions for: %s" % cache_dir) else: raise Exception("Cache directory does not exist (and could not be created): %s" % cache_dir) # now when the whole taxonomy was parsed, find sub-components of the composite kws # it is important to keep this call after the taxonomy was saved, because we don't # want to pickle regexes multiple times (as they are must be re-compiled at load time) for kt in composite_keywords.values(): kt.refreshCompositeOf(single_keywords, composite_keywords, store=store, namespace=namespace) # house-cleaning if store: store.close() return (single_keywords, composite_keywords) def _capitalize_first_letter(word): """Returns a regex pattern with the first letter accepting both lowercase and uppercase.""" if word[0].isalpha(): # These two cases are necessary in order to get a regex pattern # starting with '[xX]' and not '[Xx]'. This allows to check for # colliding regex afterwards. if word[0].isupper(): return "[" + word[0].swapcase() + word[0] +"]" + word[1:] else: return "[" + word[0] + word[0].swapcase() +"]" + word[1:] return word def _convert_punctuation(punctuation, conversion_table): """Returns a regular expression for a punctuation string.""" if punctuation in conversion_table: return conversion_table[punctuation] return re.escape(punctuation) def _convert_word(word): """Returns the plural form of the word if it exists, the word itself otherwise.""" out = None # Acronyms. if word.isupper(): out = word + "s?" # Proper nouns or word with digits. elif word.istitle(): out = word + "('?s)?" elif _contains_digit.search(word): out = word if out is not None: return out # Words with non or anti prefixes. if _starts_with_non.search(word): word = "non-?" + _capitalize_first_letter(_convert_word(word[3:])) elif _starts_with_anti.search(word): word = "anti-?" + _capitalize_first_letter(_convert_word(word[4:])) if out is not None: return _capitalize_first_letter(out) # A few invariable words. if word in bconfig.CFG_BIBCLASSIFY_INVARIABLE_WORDS: return _capitalize_first_letter(word) # Some exceptions that would not produce good results with the set of # general_regular_expressions. if word in bconfig.CFG_BIBCLASSIFY_EXCEPTIONS: return _capitalize_first_letter(bconfig.CFG_BIBCLASSIFY_EXCEPTIONS[word]) for regex in bconfig.CFG_BIBCLASSIFY_UNCHANGE_REGULAR_EXPRESSIONS: if regex.search(word) is not None: return _capitalize_first_letter(word) for regex, replacement in bconfig.CFG_BIBCLASSIFY_GENERAL_REGULAR_EXPRESSIONS: stemmed = regex.sub(replacement, word) if stemmed != word: return _capitalize_first_letter(stemmed) return _capitalize_first_letter(word + "s?") def _get_cache(cache_file, source_file=None): """Get the cached taxonomy using the cPickle module. No check is done at that stage. @var cache_file: fullpath to the file holding pickled data @keyword source_file: if we discover the cache is obsolete, we will build a new cache, therefore we need the source path of the cache @return: (single_keywords, composite_keywords)""" timer_start = time.clock() filestream = open(cache_file, "rb") try: #bibclassify_ontology_reader = sys.modules['bibclassify_ontology_reader'] cached_data = cPickle.load(filestream) if cached_data['version_info']['rdflib'] != (rdflib and rdflib.__version__) or \ cached_data['version_info']['bibclassify'] != bconfig.VERSION: raise KeyError except (cPickle.UnpicklingError, AttributeError, DeprecationWarning, EOFError), e: log.warning("The existing cache in %s is not readable. " "Removing and rebuilding it." % cache_file) filestream.close() os.remove(cache_file) return _build_cache(source_file) except KeyError: log.warning("The existing cache %s is not up-to-date. " "Removing and rebuilding it." % cache_file) filestream.close() os.remove(cache_file) if source_file and os.path.exists(source_file): return _build_cache(source_file) else: log.error("The cache contains obsolete data (and it was deleted), \ however I can't build a new cache, the source does not exist or is inaccessible! - %s" % source_file) filestream.close() single_keywords = cached_data["single"] composite_keywords = cached_data["composite"] # the cache contains only keys of the composite keywords, not the objects # so now let's resolve them into objects for kw in composite_keywords.values(): kw.refreshCompositeOf(single_keywords, composite_keywords) log.debug("Retrieved taxonomy from cache %s created on %s" % (cache_file, time.asctime(cached_data["creation_time"]))) log.debug("%d terms read in %.1f sec." % (len(single_keywords) + len(composite_keywords), time.clock() - timer_start)) return (single_keywords, composite_keywords) def _get_cache_path(source_file): """Returns the path where the cache of this taxonomy should be written/located @var onto_name: name of the ontology or the full path @return: string, abs path to the cache file in the tmpdir/bibclassify """ local_name = os.path.basename(source_file) cache_name = local_name + ".db" cache_dir = os.path.join(config.CFG_CACHEDIR, "bibclassify") return os.path.abspath(os.path.join(cache_dir, cache_name)) def _get_last_modification_date(url): """Get the last modification date of the ontology.""" request = urllib2.Request(url) request.get_method = lambda: "HEAD" - http_file = urllib2.urlopen(request) + http_file = urlopen(request) date_string = http_file.headers["last-modified"] parsed = time.strptime(date_string, "%a, %d %b %Y %H:%M:%S %Z") return datetime(*(parsed)[0:6]) def _download_ontology(url, local_file): """Downloads the ontology and stores it in CFG_CACHEDIR.""" log.debug("Copying remote ontology '%s' to file '%s'." % (url, local_file)) try: - url_desc = urllib2.urlopen(url) + url_desc = urlopen(url) file_desc = open(local_file, 'w') file_desc.write(url_desc.read()) file_desc.close() except IOError, e: print e return False except: log.warning("Unable to download the ontology. '%s'" % sys.exc_info()[0]) return False else: log.debug("Done copying.") return True def _get_searchable_regex(basic=None, hidden=None): """Returns the searchable regular expressions for the single keyword.""" # Hidden labels are used to store regular expressions. basic = basic or [] hidden = hidden or [] hidden_regex_dict = {} for hidden_label in hidden: if _is_regex(hidden_label): hidden_regex_dict[hidden_label] = \ re.compile(bconfig.CFG_BIBCLASSIFY_WORD_WRAP % hidden_label[1:-1]) else: pattern = _get_regex_pattern(hidden_label) hidden_regex_dict[hidden_label] = \ re.compile(bconfig.CFG_BIBCLASSIFY_WORD_WRAP % pattern) # We check if the basic label (preferred or alternative) is matched # by a hidden label regex. If yes, discard it. regex_dict = {} # Create regex for plural forms and add them to the hidden labels. for label in basic: pattern = _get_regex_pattern(label) regex_dict[label] = re.compile(bconfig.CFG_BIBCLASSIFY_WORD_WRAP % pattern) # Merge both dictionaries. regex_dict.update(hidden_regex_dict) return regex_dict.values() def _get_regex_pattern(label): """Returns a regular expression of the label that takes care of plural and different kinds of separators.""" parts = _split_by_punctuation.split(label) for index, part in enumerate(parts): if index % 2 == 0: # Word if not parts[index].isdigit() and len(parts[index]) > 1: parts[index] = _convert_word(parts[index]) else: # Punctuation if not parts[index + 1]: # The separator is not followed by another word. Treat # it as a symbol. parts[index] = _convert_punctuation(parts[index], bconfig.CFG_BIBCLASSIFY_SYMBOLS) else: parts[index] = _convert_punctuation(parts[index], bconfig.CFG_BIBCLASSIFY_SEPARATORS) return "".join(parts) def _is_regex(string): """Checks if a concept is a regular expression.""" return string[0] == "/" and string[-1] == "/" def check_taxonomy(taxonomy): """Checks the consistency of the taxonomy and outputs a list of errors and warnings.""" if not rdflib: raise Exception("The taxonomy checking is possible only with RDFLIB") log.info("Building graph with Python RDFLib version %s" % rdflib.__version__) if rdflib.__version__ >= '2.3.2': store = rdflib.ConjunctiveGraph() else: store = rdflib.Graph() try: store.parse(taxonomy) except: log.error("The taxonomy is not a valid RDF file. Are you " "trying to check a controlled vocabulary?") raise Exception('Error in RDF file') log.info("Graph was successfully built.") prefLabel = "prefLabel" hiddenLabel = "hiddenLabel" altLabel = "altLabel" composite = "composite" compositeOf = "compositeOf" note = "note" both_skw_and_ckw = [] # Build a dictionary we will reason on later. uniq_subjects = {} for subject in store.subjects(): uniq_subjects[subject] = None subjects = {} for subject in uniq_subjects: strsubject = str(subject).split("#Composite.")[-1] strsubject = strsubject.split("#")[-1] if (strsubject == "http://cern.ch/thesauri/HEPontology.rdf" or strsubject == "compositeOf"): continue components = {} for predicate, value in store.predicate_objects(subject): strpredicate = str(predicate).split("#")[-1] strobject = str(value).split("#Composite.")[-1] strobject = strobject.split("#")[-1] components.setdefault(strpredicate, []).append(strobject) if strsubject in subjects: both_skw_and_ckw.append(strsubject) else: subjects[strsubject] = components log.info("Taxonomy contains %s concepts." % len(subjects)) no_prefLabel = [] multiple_prefLabels = [] bad_notes = [] # Subjects with no composite or compositeOf predicate lonely = [] both_composites = [] bad_hidden_labels = {} bad_alt_labels = {} # Problems with composite keywords composite_problem1 = [] composite_problem2 = [] composite_problem3 = [] composite_problem4 = {} composite_problem5 = [] composite_problem6 = [] stemming_collisions = [] interconcept_collisions = {} for subject, predicates in subjects.iteritems(): # No prefLabel or multiple prefLabels try: if len(predicates[prefLabel]) > 1: multiple_prefLabels.append(subject) except KeyError: no_prefLabel.append(subject) # Lonely and both composites. if not composite in predicates and not compositeOf in predicates: lonely.append(subject) elif composite in predicates and compositeOf in predicates: both_composites.append(subject) # Multiple or bad notes if note in predicates: bad_notes += [(subject, n) for n in predicates[note] if n not in ('nostandalone', 'core')] # Bad hidden labels if hiddenLabel in predicates: for lbl in predicates[hiddenLabel]: if lbl.startswith("/") ^ lbl.endswith("/"): bad_hidden_labels.setdefault(subject, []).append(lbl) # Bad alt labels if altLabel in predicates: for lbl in predicates[altLabel]: if len(re.findall("/", lbl)) >= 2 or ":" in lbl: bad_alt_labels.setdefault(subject, []).append(lbl) # Check composite if composite in predicates: for ckw in predicates[composite]: if ckw in subjects: if compositeOf in subjects[ckw]: if not subject in subjects[ckw][compositeOf]: composite_problem3.append((subject, ckw)) else: if not ckw in both_skw_and_ckw: composite_problem2.append((subject, ckw)) else: composite_problem1.append((subject, ckw)) # Check compositeOf if compositeOf in predicates: for skw in predicates[compositeOf]: if skw in subjects: if composite in subjects[skw]: if not subject in subjects[skw][composite]: composite_problem6.append((subject, skw)) else: if not skw in both_skw_and_ckw: composite_problem5.append((subject, skw)) else: composite_problem4.setdefault(skw, []).append(subject) # Check for stemmed labels if compositeOf in predicates: labels = (altLabel, hiddenLabel) else: labels = (prefLabel, altLabel, hiddenLabel) patterns = {} for label in [lbl for lbl in labels if lbl in predicates]: for expression in [expr for expr in predicates[label] if not _is_regex(expr)]: pattern = _get_regex_pattern(expression) interconcept_collisions.setdefault(pattern, []).append((subject, label)) if pattern in patterns: stemming_collisions.append((subject, patterns[pattern], (label, expression) )) else: patterns[pattern] = (label, expression) print "\n==== ERRORS ====" if no_prefLabel: print "\nConcepts with no prefLabel: %d" % len(no_prefLabel) print "\n".join([" %s" % subj for subj in no_prefLabel]) if multiple_prefLabels: print ("\nConcepts with multiple prefLabels: %d" % len(multiple_prefLabels)) print "\n".join([" %s" % subj for subj in multiple_prefLabels]) if both_composites: print ("\nConcepts with both composite properties: %d" % len(both_composites)) print "\n".join([" %s" % subj for subj in both_composites]) if bad_hidden_labels: print "\nConcepts with bad hidden labels: %d" % len(bad_hidden_labels) for kw, lbls in bad_hidden_labels.iteritems(): print " %s:" % kw print "\n".join([" '%s'" % lbl for lbl in lbls]) if bad_alt_labels: print "\nConcepts with bad alt labels: %d" % len(bad_alt_labels) for kw, lbls in bad_alt_labels.iteritems(): print " %s:" % kw print "\n".join([" '%s'" % lbl for lbl in lbls]) if both_skw_and_ckw: print ("\nKeywords that are both skw and ckw: %d" % len(both_skw_and_ckw)) print "\n".join([" %s" % subj for subj in both_skw_and_ckw]) print if composite_problem1: print "\n".join(["SKW '%s' references an unexisting CKW '%s'." % (skw, ckw) for skw, ckw in composite_problem1]) if composite_problem2: print "\n".join(["SKW '%s' references a SKW '%s'." % (skw, ckw) for skw, ckw in composite_problem2]) if composite_problem3: print "\n".join(["SKW '%s' is not composite of CKW '%s'." % (skw, ckw) for skw, ckw in composite_problem3]) if composite_problem4: for skw, ckws in composite_problem4.iteritems(): print "SKW '%s' does not exist but is " "referenced by:" % skw print "\n".join([" %s" % ckw for ckw in ckws]) if composite_problem5: print "\n".join(["CKW '%s' references a CKW '%s'." % kw for kw in composite_problem5]) if composite_problem6: print "\n".join(["CKW '%s' is not composed by SKW '%s'." % kw for kw in composite_problem6]) print "\n==== WARNINGS ====" if bad_notes: print ("\nConcepts with bad notes: %d" % len(bad_notes)) print "\n".join([" '%s': '%s'" % note for note in bad_notes]) if stemming_collisions: print ("\nFollowing keywords have unnecessary labels that have " "already been generated by BibClassify.") for subj in stemming_collisions: print " %s:\n %s\n and %s" % subj print "\nFinished." sys.exit(0) def test_cache(taxonomy_name='HEP', rebuild_cache=False, no_cache=False): cache = get_cache(taxonomy_name) if not cache: set_cache(taxonomy_name, get_regular_expressions(taxonomy_name, rebuild=rebuild_cache, no_cache=no_cache)) cache = get_cache(taxonomy_name) return (thread.get_ident(), cache) log.info('Loaded ontology reader') if __name__ == '__main__': test_cache() diff --git a/modules/bibclassify/lib/bibclassify_text_extractor.py b/modules/bibclassify/lib/bibclassify_text_extractor.py index b3e418742..24eec9826 100644 --- a/modules/bibclassify/lib/bibclassify_text_extractor.py +++ b/modules/bibclassify/lib/bibclassify_text_extractor.py @@ -1,173 +1,179 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibClassify text extractor. This module provides method to extract the fulltext from local or remote documents. Currently 2 formats of documents are supported: PDF and text documents. 2 methods provide the functionality of the module: text_lines_from_local_file and text_lines_from_url. This module also provides the utility 'is_pdf' that uses GNU file in order to determine if a local file is a PDF file. This module is STANDALONE safe """ import os import re import sys import tempfile import urllib2 import bibclassify_config as bconfig +if bconfig.STANDALONE: + from urllib2 import urlopen +else: + from urlutils import make_invenio_opener + urlopen = make_invenio_opener('BibClassify').open + log = bconfig.get_logger("bibclassify.text_extractor") _ONE_WORD = re.compile("[A-Za-z]{2,}") def text_lines_from_local_file(document, remote=False): """Returns the fulltext of the local file. @var document: fullpath to the file that should be read @var remote: boolean, if True does not count lines (gosh!) @return: list of lines if st was read or an empty list""" # FIXME - this does not care if we open anything, including binary files try: if is_pdf(document): if not executable_exists("pdftotext"): log.error("pdftotext is not available on the system.") cmd = "pdftotext -q -enc UTF-8 %s -" % re.escape(document) filestream = os.popen(cmd) else: filestream = open(document, "r") except IOError, ex1: log.error("Unable to read from file %s. (%s)" % (document, ex1.strerror)) return [] # FIXME - we assume it is utf-8 encoded / that is not good lines = [line.decode("utf-8", 'replace') for line in filestream] filestream.close() if not _is_english_text('\n'.join(lines)): log.warning("It seems the file '%s' is unvalid and doesn't " "contain text. Please communicate this file to the Invenio " "team." % document) line_nb = len(lines) word_nb = 0 for line in lines: word_nb += len(re.findall("\S+", line)) # Discard lines that do not contain at least one word. lines = [line for line in lines if _ONE_WORD.search(line) is not None] if not remote: log.info("Local file has %d lines and %d words." % (line_nb, word_nb)) return lines def _is_english_text(text): """ Checks if a text is correct english. Computes the number of words in the text and compares it to the expected number of words (based on an average size of words of 5.1 letters). @param text_lines: the text to analyze @type text_lines: string @return: True if the text is English, False otherwise @rtype: Boolean """ # Consider one word and one space. avg_word_length = 5.1 + 1 expected_word_number = float(len(text)) / avg_word_length words = [word for word in re.split('\W', text) if word.isalpha()] word_number = len(words) return word_number > .5 * expected_word_number def text_lines_from_url(url, user_agent=""): """Returns the fulltext of the file found at the URL.""" request = urllib2.Request(url) if user_agent: request.add_header("User-Agent", user_agent) try: - distant_stream = urllib2.urlopen(request) + distant_stream = urlopen(request) # Write the URL content to a temporary file. local_file = tempfile.mkstemp(prefix="bibclassify.")[1] local_stream = open(local_file, "w") local_stream.write(distant_stream.read()) local_stream.close() except: log.error("Unable to read from URL %s." % url) return None else: # Read lines from the temporary file. lines = text_lines_from_local_file(local_file, remote=True) os.remove(local_file) line_nb = len(lines) word_nb = 0 for line in lines: word_nb += len(re.findall("\S+", line)) log.info("Remote file has %d lines and %d words." % (line_nb, word_nb)) return lines def executable_exists(executable): """Tests if an executable is available on the system.""" for directory in os.getenv("PATH").split(":"): if os.path.exists(os.path.join(directory, executable)): return True return False def is_pdf(document): """Checks if a document is a PDF file. Returns True if is is.""" if not executable_exists('pdftotext'): log.warning("GNU file was not found on the system. " "Switching to a weak file extension test.") if document.lower().endswith(".pdf"): return True return False # Tested with file version >= 4.10. First test is secure and works # with file version 4.25. Second condition is tested for file # version 4.10. file_output = os.popen('file ' + re.escape(document)).read() try: filetype = file_output.split(":")[1] except IndexError: log.error("Your version of the 'file' utility seems to " "be unsupported. Please report this to cds.support@cern.ch.") raise Exception('Incompatible pdftotext') pdf = filetype.find("PDF") > -1 # This is how it should be done however this is incompatible with # file version 4.10. #os.popen('file -bi ' + document).read().find("application/pdf") return pdf diff --git a/modules/bibencode/lib/bibencode_tester.py b/modules/bibencode/lib/bibencode_tester.py index 5d7678aaa..cf6621243 100644 --- a/modules/bibencode/lib/bibencode_tester.py +++ b/modules/bibencode/lib/bibencode_tester.py @@ -1,397 +1,400 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Unit tests for BibEncode. * Please run conversion_for_unit_tests.py before you run the tests for the first time! """ __revision__ = "$Id$" import unittest from invenio import bibencode_utils from invenio import bibencode_encode from invenio import bibencode_metadata import invenio.config from invenio.bibencode_encode import encode_video from invenio.bibencode_extract import extract_frames from invenio.textutils import wait_for_user +from invenio.urlutils import make_invenio_opener from os.path import basename import os from urlparse import urlsplit import shutil import urllib2 from invenio.testutils import make_test_suite, run_test_suite +BIBENCODE_OPENER = make_invenio_opener('BibEncode') + ## original URL video_url = "http://media.xiph.org/video/derf/y4m/blue_sky_1080p25.y4m" video01 = invenio.config.CFG_TMPDIR + "/blue_sky_1080p25.y4m" video01_out01 = invenio.config.CFG_TMPDIR + "/blue_sky_1080p.mp4" video01_out02 = invenio.config.CFG_TMPDIR + "/blue_sky_1080p.ogg" video01_out03 = invenio.config.CFG_TMPDIR + "/blue_sky_1080p.webm" video01_out04 = invenio.config.CFG_TMPDIR + "/blue_sky_720p.mp4" video01_out05 = invenio.config.CFG_TMPDIR + "/blue_sky_720p.ogg" video01_out06 = invenio.config.CFG_TMPDIR + "/blue_sky_720p.webm" video01_out07 = invenio.config.CFG_TMPDIR + "/blue_sky_480p.mp4" video01_out08 = invenio.config.CFG_TMPDIR + "/blue_sky_480p.ogg" video01_out09 = invenio.config.CFG_TMPDIR + "/blue_sky_480p.webm" movie_no_aspect = invenio.config.CFG_TMPDIR + "/blue_sky_1080p_anamorphic.webm" metadata = { "title": "Super Duper Difficult Test Metadata Video File", "author": "Invenio Author", "album_artist": "Invenio Album Artist", "album": "Invenio Album", "grouping": "Invenio Grouping", "composter": "Invenio Composer", "year": "2011", "track": "42", "comment": "Invenio Comment", "genre": "Invenio Genre", "copyright": "Invenio Copyright", "description": "Invenio Description", "synopsis": "Invenio Synopsis", "show": "Invenio Show", "episode_id": "S04x42", "network": "Invenio Network", "lyrics": "Invenio Lyrics", } def url2name(url): return basename(urlsplit(url)[2]) def download(url, localFileName = None): """ Downloads a file from a remote url """ localName = url2name(url) req = urllib2.Request(url) - r = urllib2.urlopen(req) + r = BIBENCODE_OPENER.open(req) if r.info().has_key('Content-Disposition'): # If the response has Content-Disposition, we take file name from it localName = r.info()['Content-Disposition'].split('filename=')[1] if localName[0] == '"' or localName[0] == "'": localName = localName[1:-1] elif r.url != url: # if we were redirected, the real file name we take from the final URL localName = url2name(r.url) if localFileName: # we can force to save the file as specified name localName = localFileName f = open(localName, 'wb') shutil.copyfileobj(r, f) f.close() def printr(message): """ Print with carriage return """ print("\r" + message) class SetupTester(unittest.TestCase): """Prepares the necessary files for the tests""" def test_setUp(self): if not os.path.exists(video01): print("Downloading sample video ... ") download(video_url, video01) print("Starting encoding ... ") self.assertEqual(encode_video(video01, video01_out01, "libfaac", "libx264", 128000, 8000000, "1920x1080", 1, "-vpre medium", metadata=metadata, update_fnc=printr), 1) self.assertEqual(encode_video(video01, video01_out02, "libvorbis", "libtheora", 128000, 8000000, "1920x1080", 1, metadata=metadata, update_fnc=printr), 1) self.assertEqual(encode_video(video01, video01_out03, "libvorbis", "libvpx", 128000, 8000000, "1920x1080", 1, "-g 320 -qmax 63", metadata=metadata, update_fnc=printr), 1) self.assertEqual(encode_video(video01, video01_out04, "libfaac", "libx264", 128000, 4000000, "1280x720", 1, "-vpre medium", metadata=metadata, update_fnc=printr), 1) self.assertEqual(encode_video(video01, video01_out05, "libvorbis", "libtheora", 128000, 4000000, "1280x720", 1, metadata=metadata, update_fnc=printr), 1) self.assertEqual(encode_video(video01, video01_out06, "libvorbis", "libvpx", 128000, 4000000, "1280x720", 1, "-g 320 -qmax 63", metadata=metadata, update_fnc=printr), 1) self.assertEqual(encode_video(video01, video01_out07, "libfaac", "libx264", 128000, 2000000, "852x480", 1, "-vpre medium", metadata=metadata, update_fnc=printr), 1) self.assertEqual(encode_video(video01, video01_out08, "libvorbis", "libtheora", 128000, 2000000, "854x480", 1, metadata=metadata, update_fnc=printr), 1) self.assertEqual(encode_video(video01, video01_out09, "libvorbis", "libvpx", 128000, 2000000, "852x480", 1, "-g 320 -qmax 63", metadata=metadata, update_fnc=printr), 1) self.assertEqual(encode_video(video01, movie_no_aspect, "libvorbis", "libvpx", 128000, 8000000, "1440x1080", 1, "-g 320 -qmax 63", metadata=metadata, update_fnc=printr), 1) print("Starting frame extraction ...") self.assertEqual(extract_frames(video01_out01, output_file=invenio.config.CFG_TMPDIR + "/testframes1_", size=None, positions=None, numberof=10, extension='jpg', width=None, height=None, aspect=None, profile=None, update_fnc=printr, message_fnc=printr), 1) self.assertEqual(extract_frames(video01_out01, output_file=invenio.config.CFG_TMPDIR + "/testframes2_", size="640x360", positions=None, numberof=10, extension='jpg', width=None, height=None, aspect=None, profile=None, update_fnc=printr, message_fnc=printr), 1) self.assertEqual(extract_frames(video01_out01, output_file=invenio.config.CFG_TMPDIR + "/testframes3_", size=None, positions=None, numberof=10, extension='jpg', width=640, height=None, aspect=None, profile=None, update_fnc=printr, message_fnc=printr), 1) self.assertEqual(extract_frames(video01_out01, output_file=invenio.config.CFG_TMPDIR + "/testframes4_", size=None, positions=None, numberof=10, extension='jpg', width=None, height=360, aspect=None, profile=None, update_fnc=printr, message_fnc=printr), 1) self.assertEqual(extract_frames(video01_out01, output_file=invenio.config.CFG_TMPDIR + "/testframes5_", size=None, positions=None, numberof=10, extension='jpg', width=640, height=360, aspect=None, profile=None, update_fnc=printr, message_fnc=printr), 1) self.assertEqual(extract_frames(video01_out01, output_file=invenio.config.CFG_TMPDIR + "/testframes6_", size=None, positions=[1, 5, 10, 15, 20], numberof=None, extension='jpg', width=None, height=None, aspect=None, profile=None, update_fnc=printr, message_fnc=printr), 1) self.assertEqual(extract_frames(video01_out01, output_file=invenio.config.CFG_TMPDIR + "/testframes7_", size=None, positions=["00:00:01.00", "00:00:02.00","00:00:03.00", "00:00:04.00", "00:00:05.00"], numberof=None, extension='jpg', width=None, height=None, aspect=None, profile=None, update_fnc=printr, message_fnc=printr), 1) self.assertEqual(extract_frames(video01_out01, output_file=invenio.config.CFG_TMPDIR + "/testframes8_", size=None, positions=["00:00:01.00", 5,"00:00:03.00", 10, "00:00:05.00"], numberof=None, extension='jpg', width=None, height=None, aspect=None, profile=None, update_fnc=printr, message_fnc=printr), 1) print("All done") class TestFFmpegMinInstallation(unittest.TestCase): """Tests if the minimum FFmpeg installation is available""" def test_ffmpeg(self): self.assertEqual(bibencode_utils.check_ffmpeg_configuration(), None) class TestUtilsFunctions(unittest.TestCase): """Tests the utility functions in bibencode_utils""" def test_timcode_to_seconds(self): """Convert timecode to seconds""" self.assertEqual(bibencode_utils.timecode_to_seconds("00:00:00"),0.0) self.assertEqual(bibencode_utils.timecode_to_seconds("00:00:00.00"),0.0) self.assertEqual(bibencode_utils.timecode_to_seconds("00:00:00.10"),0.1) self.assertEqual(bibencode_utils.timecode_to_seconds("00:00:01.00"),1.0) self.assertEqual(bibencode_utils.timecode_to_seconds("00:00:00.01"),0.01) self.assertEqual(bibencode_utils.timecode_to_seconds("00:00:10"),10.0) self.assertEqual(bibencode_utils.timecode_to_seconds("00:10:10"),610.0) self.assertEqual(bibencode_utils.timecode_to_seconds("10:10:10"),36610.0) self.assertEqual(bibencode_utils.timecode_to_seconds("10:10:10.10"),36610.10) def test_seconds_to_timecode(self): """Convert seconds to timecode""" self.assertEqual(bibencode_utils.seconds_to_timecode(0.0),"00:00:00.00") self.assertEqual(bibencode_utils.seconds_to_timecode(0.1),"00:00:00.10") self.assertEqual(bibencode_utils.seconds_to_timecode(1.0),"00:00:01.00") self.assertEqual(bibencode_utils.seconds_to_timecode(1.1),"00:00:01.10") self.assertEqual(bibencode_utils.seconds_to_timecode(10.0),"00:00:10.00") self.assertEqual(bibencode_utils.seconds_to_timecode(610.0),"00:10:10.00") self.assertEqual(bibencode_utils.seconds_to_timecode(36610.0),"10:10:10.00") self.assertEqual(bibencode_utils.seconds_to_timecode(36610.10),"10:10:10.10") self.assertEqual(bibencode_utils.seconds_to_timecode(36601.10),"10:10:01.10") self.assertEqual(bibencode_utils.seconds_to_timecode(36600.10),"10:10:00.10") self.assertEqual(bibencode_utils.seconds_to_timecode("36600.10"),"10:10:00.10") def test_is_seconds(self): """Tests if given value is seconds like""" self.assertEqual(bibencode_utils.is_seconds(1), True) self.assertEqual(bibencode_utils.is_seconds(1.1), True) self.assertEqual(bibencode_utils.is_seconds("1"), True) self.assertEqual(bibencode_utils.is_seconds("1.1"), True) self.assertEqual(bibencode_utils.is_seconds("11.11"), True) self.assertEqual(bibencode_utils.is_seconds("1s"), False) self.assertEqual(bibencode_utils.is_seconds("1.1s"), False) self.assertEqual(bibencode_utils.is_seconds(""), False) def test_is_timecode(self): """Test if given value is a timecode""" self.assertEqual(bibencode_utils.is_timecode("00:00:00"), True) self.assertEqual(bibencode_utils.is_timecode("00:00:00.00"), True) self.assertEqual(bibencode_utils.is_timecode("00:00:00.0"), True) self.assertEqual(bibencode_utils.is_timecode("00:00:00.000"), True) self.assertEqual(bibencode_utils.is_timecode("00:00:0.0"), False) self.assertEqual(bibencode_utils.is_timecode("00:00"), False) self.assertEqual(bibencode_utils.is_timecode("00:00.00"), False) self.assertEqual(bibencode_utils.is_timecode("00"), False) self.assertEqual(bibencode_utils.is_timecode("0"), False) self.assertEqual(bibencode_utils.is_timecode("00.00"), False) self.assertEqual(bibencode_utils.is_timecode("0.0"), False) def test_aspect_string_to_float(self): """Tests if string contains an aspect ratio""" self.assertAlmostEqual(bibencode_utils.aspect_string_to_float("4:3"), 1.333, places=2) self.assertAlmostEqual(bibencode_utils.aspect_string_to_float("16:9"), 1.777, places=2) class TestEncodeFunctions(unittest.TestCase): """Tests the functions of bibencode_encode""" def test_determine_aspect(self): """Tests if the aspect is correctly detected""" self.assertEqual(bibencode_encode.determine_aspect(video01_out02), ("16:9", 1920, 1080)) self.assertEqual(bibencode_encode.determine_aspect(video01_out05), ("16:9", 1280, 720)) self.assertEqual(bibencode_encode.determine_aspect(video01_out08), ("427:240", 854, 480)) def test_determine_resolution(self): """Tests if the resolution is correctly calculated""" # The aspect is fully detectable in the video self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 1920, 1080, None), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 1280, 720, None), "1280x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 854, 480, None), "854x480") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 1920, None, None), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 1280, None, None), "1280x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 854, None, None), "854x480") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, None, 1080, None), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, None, 720, None), "1280x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, None, 480, None), "854x480") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 1920, 1080, 1.777), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 1280, 720, 1.777), "1280x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 854, 480, 1.78), "854x480") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 1920, None, 1.777), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 1280, None, 1.777), "1280x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, 854, None, 1.78), "854x480") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, None, 1080, 1.777), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, None, 720, 1.777), "1280x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(video01_out03, None, 480, 1.78), "854x480") # The aspect is not detectable in the video self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1920, 1080, None), "1440x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1280, 720, None), "960x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 854, 480, None), "640x480") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1920, None, None), "1920x1440") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1280, None, None), "1280x960") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 854, None, None), "854x640") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, None, 1080, None), "1440x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, None, 720, None), "960x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, None, 480, None), "640x480") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1920, 1080, 1.777), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1280, 720, 1.777), "1280x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 854, 480, 1.78), "854x480") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1920, None, 1.777), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1280, None, 1.777), "1280x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 854, None, 1.78), "854x480") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, None, 1080, 1.777), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, None, 720, 1.777), "1280x720") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, None, 480, 1.78), "854x480") # Alternative aspect notation self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1920, 1080, "16:9"), "1920x1080") self.assertEqual(bibencode_encode.determine_resolution_preserving_aspect(movie_no_aspect, 1920, 1080, "4:3"), "1440x1080") def test_assure_quality(self): """ Test if the quality is detected correctly""" self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 1920, 1080, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 1280, 720, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 4443, 2500, 6000000, True, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 1280, 720, 10000000, True, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 1920, 1080, 10000000, True, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 1920, 1080, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, None, 720, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, None, 2500, 6000000, True, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, None, 720, 10000000, True, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, None, 1080, 10000000, True, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 1920, None, None, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 1280, None, None, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 4443, None, None, True, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, None, None, 10000000, True, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, None, None, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 800, 600, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, "4:3", 800, 600, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, "4:3", 1440, 1080, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, 1.333333333333333333, 800, 600, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, 1.333333333333333333, 1440, 1080, 6000000, True, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, 1.333, 800, 600, 6000000, True, 0.95), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, 1.333, 1440, 1080, 6000000, True, 0.95), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 800, 600, 6000000, True, 0.95), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, None, 1440, 1080, 6000000, True, 0.95), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, 1.333, 800, 600, 6000000, False, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(video01_out03, 1.333, 1440, 1080, 6000000, False, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(movie_no_aspect, None, 800, 600, 6000000, False, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(movie_no_aspect, None, 1440, 1080, 6000000, False, 1.0), True) self.assertEqual(bibencode_encode.assure_quality(movie_no_aspect, None, 1920, 1080, 6000000, False, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(movie_no_aspect, None, 1920, 1080, 6000000, True, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(movie_no_aspect, "16:9", 1920, 1080, 6000000, False, 1.0), False) self.assertEqual(bibencode_encode.assure_quality(movie_no_aspect, "16:9", 1920, 1080, 6000000, True, 1.0), True) class TestExtractFunctions(unittest.TestCase): """Tests the functions of bibencode_extract""" pass class TestMetadataFunctions(unittest.TestCase): """Tests the functions of bibencode_metadata""" def test_ffrobe_metadata(self): """Test if ffprobe metadata outputs correctly""" metadata_check = { 'format': {'TAG:album': '"Invenio Album"', 'TAG:album_artist': '"Invenio Album Artist"', 'TAG:comment': '"Invenio Comment"', 'TAG:compatible_brands': 'isomiso2avc1mp41', 'TAG:copyright': '"Invenio Copyright"', 'TAG:creation_time': '1970-01-01 00:00:00', 'TAG:description': '"Invenio Description"', 'TAG:encoder': 'Lavf53.1.0', 'TAG:episode_id': '"S04x42"', 'TAG:genre': '"Invenio Genre"', 'TAG:grouping': '"Invenio Grouping"', 'TAG:lyrics': '"Invenio Lyrics"', 'TAG:major_brand': 'isom', 'TAG:minor_version': '512', 'TAG:network': '"Invenio Network"', 'TAG:show': '"Invenio Show"', 'TAG:synopsis': '"Invenio Synopsis"', 'TAG:title': '"Super Duper Difficult Test Metadata Video File"', 'bit_rate': '7606651.000000 ', 'duration': '10.000000 ', 'filename': '/home/oldi/videos/park_joy_1080p.mp4', 'format_long_name': 'QuickTime/MPEG-4/Motion JPEG 2000 format', 'format_name': 'mov,mp4,m4a,3gp,3g2,mj2', 'nb_streams': '1', 'size': '9508314.000000 ', 'start_time': '0.000000 '}, 'streams': [{'TAG:creation_time': '1970-01-01 00:00:00', 'TAG:language': 'und', 'avg_frame_rate': '50/1', 'codec_long_name': 'H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10', 'codec_name': 'h264', 'codec_tag': '0x31637661', 'codec_tag_string': 'avc1', 'codec_time_base': '1/100', 'codec_type': 'video', 'display_aspect_ratio': '30:17', 'duration': '10.000000 ', 'has_b_frames': '2', 'height': '1088', 'index': '0', 'nb_frames': '500', 'pix_fmt': 'yuv420p', 'r_frame_rate': '50/1', 'sample_aspect_ratio': '1:1', 'start_time': '0.000000 ', 'time_base': '1/50', 'width': '1920'}]} self.assertEqual(bibencode_metadata.ffprobe_metadata(video01_out01), metadata_check) class TestBatchEngineFunctions(unittest.TestCase): """Tests the functions of bibencode_batch_engine""" pass class TestDaemonFunctions(unittest.TestCase): """Tests the functions of bibencode_daemon""" pass TEST_SUITE = make_test_suite(SetupTester, TestUtilsFunctions, TestEncodeFunctions, TestExtractFunctions, ## TestMetadataFunctions, TestBatchEngineFunctions, TestDaemonFunctions) if __name__ == "__main__": wait_for_user(""" ####################################################### # This is the test suite for the BibEncode module # # # # You need to have installed ffmpeg with H.264, WebM # # and Theora support! Please see the manual! # # # # Please be aware that not every aspect can be tested # # due to the nature of video encoding and wrapping # # external libraries like ffmpeg. The results should # # only be seen as an indicator and do not necessarily # # mean that there is something wrong. # # # # You should evaluate the output manually in the tmp # # folder of your Invenio installation # # # # The test suite will download and create several # # gigabytes of video material to perform the test! # # The whole test might take up half an hour # # # # Do you wich to continue? Then enter "Yes, I know!". # # Else press 'ctrl + c' to leave this tool. # ####################################################### """) run_test_suite(TEST_SUITE) diff --git a/modules/bibformat/lib/elements/bfe_issn.py b/modules/bibformat/lib/elements/bfe_issn.py index 86d282502..aa617cf82 100644 --- a/modules/bibformat/lib/elements/bfe_issn.py +++ b/modules/bibformat/lib/elements/bfe_issn.py @@ -1,1082 +1,1084 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Print ISSN corresponding to given journal name """ __revision__ = "$Id$" import pprint -import urllib import sys import re import getopt from invenio.search_engine import perform_request_search from invenio.search_engine_utils import get_fieldvalues from invenio.config import CFG_CERN_SITE +from invenio.urlutils import make_invenio_opener + +BIBFORMAT_OPENER = make_invenio_opener('BibFormat') if CFG_CERN_SITE: journal_name_tag = '773__p' else: journal_name_tag = '909C4p' issns = { 'aapps bull.': '0218-2203', 'account. manag. inf. technol.': '0959-8022', 'acm comput. surv.': '0360-0300', 'acm sigplan not.': '0362-1340', 'acm trans. comput. syst.': '0734-2071', 'acm trans. comput.-hum. interact.': '1073-0516', 'acm trans. database syst.': '0362-5915', 'acm trans. graph.': '0730-0301', 'acm trans. inf. syst. secur.': '1094-9224', 'acm trans. internet technol.': '1533-5399', 'acm trans. math. softw.': '0098-3500', 'acm trans. program. lang. syst.': '0164-0925', 'acm trans. storage': '1553-3077', 'acta appl. math.': '1572-9036', 'acta arith.': '1730-6264', 'acta fac. rerum nat. univ. comen.: math.': '0373-8183', 'acta math. appl. sin.': '1618-3932', 'acta math. hung.': '1588-2632', 'acta mech. sin.': '1614-3116', 'acta mech. solida sin.': '1860-2134', 'acta phys. pol. a': '0587-4246', 'ad hoc netw.': '1570-8705', 'adsorption': '1572-8757', 'adv. cement based mater.': '1065-7355', 'adv. colloid interface sci.': '0001-8686', 'adv. compos. mater.': '1568-5519', 'adv. eng. inform.': '1474-0346', 'adv. eng. mater.': '1527-2648', 'adv. geom.': '1615-715X', 'adv. mater.': '1521-4095', 'adv. math.': '0001-8708', 'adv. perform. mater.': '1572-8765', 'adv. powder technol.': '1568-5527', 'adv. robot.': '1568-5535', 'air space eur.': '1290-0958', 'algebr. represent. theory': '1572-9079', 'anal. math.': '0133-3852', 'analog integr. circuits signal process.': '0925-1030', 'angew. chem.': '1521-3757', 'angew. chem. int. ed.': '1521-3773', 'ann. glob. anal. geom.': '0232-704X', 'ann. inst. stat. math.': '0020-3157', 'ann. mat. pura appl.': '1618-1891', 'ann. math. artif. intell.': '1012-2443', 'ann. oper. res.': '1572-9338', 'ann. phys. (san diego)': '0003-4916', 'ann. phys. (weinheim)': '1521-3889', 'ann. pol. math.': '1730-6272', 'ann. sci. ec. norm. sup\xc3\xa9r.': '0012-9593', 'ann. softw. eng.': '1573-7489', 'annu. rev. nucl. part. sci.': '0163-8998', 'appl. compos. mater.': '0929-189X', 'appl. intell.': '0924-669X', 'appl. math.': '0862-7940', 'appl. math. electron. notes': '1607-2510', 'appl. phys.': '0340-3793', 'appl. soft comput.': '1568-4946', 'appl. stoch. models bus. ind.': '1526-4025', 'appl. supercond.': '0964-1807', 'appl. surf. sci.': '0378-5963', 'approx. theory. appl.': '1000-9221', 'arch. mus. inform.': '1573-7500', 'arch. sci.': '1573-7519', 'ariadne': '1361-3200', 'artif. intell. rev.': '0269-2821', 'artif. life robot.': '1614-7456', 'asimmetrie': '1827-1383', 'astron. educ. rev.': '1539-1515', 'astron. q.': '0364-9229', 'astrophys. space sci.': '1572-946X', 'astrophys. space sci. trans.': '1810-6536', 'astrophysics': '1573-8191', 'at. data nucl. data tables': '0092-640X', 'at. energy': '1573-8205', 'atom': '0004-7015', 'autom. remote control.': '0005-1179', 'autom. softw. eng.': '0928-8910', 'auton. agents multi-agent syst.': '1387-2532', 'auton. robots': '0929-5593', 'banach cent. publ.': '1730-6299', 'ber. bunsenges. phys. chem.': '0005-9021', 'ber. wiss.gesch.': '1522-2365', 'bioelectromagnetics': '1521-186X', 'biom. j.': '1521-4036', 'biomed. eng.': '0006-3398', 'biophys. chem.': '0301-4622', 'biosens. bioelecton.': '0956-5663', 'bol. asoc. math. venez.': '1315-4125', 'br. j. appl. phys.': '0508-3443', 'bt technol. j.': '1358-3948', 'bulg. j. phys.': '1310-0157', 'bull. earthq. eng.': '1573-1456', 'bull. soc. math. fr.': '0037-9484', 'bull. union phys.': '0366-3878', 'bus. strategy environ.': '1099-0836', 'c. r. acad. sci., 2b': '1620-7742', 'c. r. mech.': '1631-0721', 'c. r. phys.': '1631-0705', 'can. j. electr. comput. eng.': '0840-8688', 'cas. pest. mat.': '0862-7959', 'catal. lett.': '1572-879X', 'celest. mech. dyn. astron.': '1572-9478', 'chem. mater.': '1520-5002', 'chem. vap. depos.': '1521-3862', 'chemphyschem': '1439-7641', 'chin. astron.': '0146-6364', 'chin. librariansh.': '1089-4667', 'chin. opt. lett.': '1671-7694', 'chin. phys.': '1741-4199', 'chin. phys. lett.': '0256-307X', 'circuit world': '0305-6120', 'circuits syst. signal process.': '1531-5878', 'clean technol. environ. policy': '1618-9558', 'clefs cea': '0298-6248', 'clin. phys. physiol. meas.': '0143-0815', 'cluster comput.': '1386-7857', 'coastal eng.': '0378-3839', 'colloid j.': '1608-3067', 'colloq. math.': '1730-6310', 'comments mod. phys.': '1560-5892', 'commun. acm': '0001-0782', 'commun. nonlinear sci. numer. simul.': '1007-5704', 'commun. pure appl. math.': '1097-0312', 'commun. soc.: dig. news events interest commun. eng.': '0094-5579', 'complexity': '1099-0526', 'compos. math.': '0010-437X', 'composites': '0010-4361', 'comput. archit. news': '0163-5964', 'comput. bull.': '0010-4531', 'comput. chem. eng.': '0098-1354', 'comput. commun. rev.': '0146-4833', 'comput. graph.': '0097-8930', 'comput. humanit.': '1572-8412', 'comput. ind. eng.': '0360-8352', 'comput. integr. manuf. syst.': '0951-5240', 'comput. math. model.': '1046-283X', 'comput. math. organ. theory': '1381-298X', 'comput. netw. isdn syst.': '0169-7552', 'comput. optim. appl.': '0926-6003', 'comput. phys. rep.': '0167-7977', 'comput. soc.': '0095-2737', 'comput. softw.': '0289-6540', 'comput. speech lang.': '0885-2308', 'comput. support. coop. work': '0925-9724', 'comput. vis. image underst.': '1077-3142', 'computer': '0018-9162', 'concurr. comput.: pract. exp.': '1532-0634', 'concurr.: pract. exp.': '1096-9128', 'constraints': '1572-9354', 'contact context': '1547-8890', 'contrib. plasma phys.': '1521-3986', 'cosm. res.': '0010-9525', 'cost eng. j.': '0274-9626', 'cryst. growth des.': '1528-7483', 'cryst. res. technol.': '1521-4079', 'cultiv. interact.': '1471-3225', 'curr. appl. phys.': '1567-1739', 'curr. opin. colloid. interface sci.': '1359-0294', 'cybermetrics': '1137-5019', 'cybern. syst. anal.': '1060-0396', 'czechoslov. j. phys.': '1572-9486', 'czechoslov. math. j.': '0011-4642', 'data base adv. inf. syst.': '0095-0033', 'data min. knowl. discov.': '1384-5810', 'data sci. j.': '1683-1470', 'des. autom. embed. syst.': '1572-8080', 'des. codes cryptogr.': '0925-1022', 'des. monomers polym.': '1568-5551', 'differ. equ.': '0012-2116', 'differ. geom.': '1454-511X', 'digit. signal process.': '1051-2004', 'discrete event dyn. syst.': '1573-7594', 'distrib. parallel databases': '0926-8272', 'documentaliste': '0012-4508', 'dokl. phys. chem.': '0012-5016', 'dyn. control': '1573-8450', 'e-polymers': '1618-7229', 'e-streams': '1098-4399', 'earth moon planets': '0167-9295', 'ec compet. policy newsl.': '1025-2266', 'educ. inf. technol.': '1360-2357', 'educ. stud. math.': '1573-0816', 'egypt. j. solids': '1012-5566', 'electrodepos. surf. treat.': '0300-9416', 'electron microsc. rev.': '0892-0354', 'electron. j. comb.': '1027-5487', 'electron. j. theor. phys.': '1729-5254', 'electron. libr.': '0264-0473', 'electron. res. announc. am. math. soc.': '1079-6762', 'electron. trans. artif. intell.': '1403-204X', 'empir. softw. eng.': '1382-3256', 'entropy': '1099-4300', 'environ. qual. manag.': '1520-6483', 'environmetrics': '1099-095X', 'epj a direct': '1435-3725', 'epj c direct': '1435-3725', 'epj e direct': '1435-3725', 'eso astrophys. symp.': '1611-6143', 'ethics inf. technol.': '1572-8439', 'etri j.': '1225-6463', 'eur. environ.': '1099-0976', 'eur. j. solid state inorg. chem.': '0992-4361', 'eur. trans. electr. power': '1546-3109', 'eur. union': '1472-3395', 'eurasip j. wirel. commun. netw.': '1687-1499', 'exergy': '1164-0235', 'exp. astron.': '1572-9508', 'extremes': '1572-915X', 'fire technol.': '1572-8099', 'fluid dyn.': '1573-8507', 'form. methods syst. des.': '1572-8102', 'forschung': '1522-2357', 'fortran forum': '1061-7264', 'fortschr. phys.': '1521-3978', 'found. phys.': '0015-9018', 'found. phys. lett.': '0894-9875', 'free online scholarsh. newsl.': '1535-7848', 'freepint newsl.': '1460-7239', 'frontiers (swindon)': '1460-5600', 'fuel cells': '1615-6854', 'funct. anal. appl.': '0016-2663', 'fundam. math.': '1730-6329', 'fuzzy optim. decis. mak.': '1573-2908', 'gaz. vide': '1638-802X', 'gen. relativ. gravit.': '1572-9532', 'geoinformatica': '1384-6175', 'germ. res.': '1522-2322', 'glass ceram.': '1573-8515', 'gps solut.': '1521-1886', 'graph. models image process.': '1077-3169', 'heat recovery syst. chp': '0890-4332', 'high energy chem.': '1608-3148', 'high energy density phys.': '1574-1818', 'high energy phys. nucl. phys. (beijing)': '0254-3052', 'high temp.': '0018-151X', 'hit j. sci. eng.': '1565-5008', 'icarus': '0019-1035', 'icsti forum': '1018-9580', 'ieee aerosp. electron. syst. mag.': '0885-8985', 'ieee ann. hist. comput.': '1058-6180', 'ieee antennas propag. mag.': '1045-9243', 'ieee antennas wirel. propag. lett.': '1536-1225', 'ieee assp mag.': '0740-7467', 'ieee circuits devices mag.': '8755-3996', 'ieee circuits syst. mag.': '1531-636X', 'ieee commun. lett.': '1089-7798', 'ieee commun. mag.': '0163-6804', 'ieee comput. appl. power': '0895-0156', 'ieee comput. graph. appl.': '0272-1716', 'ieee comput. sci. eng.': '1070-9924', 'ieee concurr.': '1092-3063', 'ieee control syst. mag.': '0272-1708', 'ieee des. test comput.': '0740-7475', 'ieee distrib. syst. online': '1541-4922', 'ieee electr. insul. mag.': '0883-7554', 'ieee electron device lett.': '0741-3106', 'ieee eng. med. biol. mag.': '0739-5175', 'ieee expert mag.': '0885-9000', 'ieee instrum. measur. mag.': '1094-6969', 'ieee intell. syst.': '1541-1672', 'ieee intell. syst. appl.': '1094-7167', 'ieee internet comput.': '1089-7801', 'ieee j. ocean. eng.': '0364-9059', 'ieee j. quantum electron.': '0018-9197', 'ieee j. robot. autom.': '0882-4967', 'ieee j. sel. areas commun.': '0733-8716', 'ieee j. sel. top. quantum electron.': '1077-260X', 'ieee j. solid state circuits': '0018-9200', 'ieee lcs': '1045-9235', 'ieee lts': '1055-6877', 'ieee micro': '0272-1732', 'ieee microw. guid. wave lett.': '1051-8207', 'ieee microw. mag.': '1527-3342', 'ieee microw. wirel. compon. lett.': '1531-1309', 'ieee multimed.': '1070-986X', 'ieee netw.': '0890-8044', 'ieee parallel distrib. technol.: syst. appl.': '1063-6552', 'ieee pers. commun.': '1070-9916', 'ieee pervasive comput.': '1536-1268', 'ieee photonics technol. lett.': '1041-1135', 'ieee potentials': '0278-6648', 'ieee power electron. lett.': '1540-7985', 'ieee power energy mag.': '1540-7977', 'ieee power eng. rev.': '0272-1724', 'ieee robot. autom. mag.': '1070-9932', 'ieee secur. priv. mag.': '1540-7993', 'ieee sens. j.': '1530-437X', 'ieee signal process. lett.': '1070-9908', 'ieee signal process. mag.': '1053-5888', 'ieee softw.': '0740-7459', 'ieee spectr.': '0018-9235', 'ieee technol. soc. mag.': '0278-0097', 'ieee trans. acoust. speech signal process.': '0096-3518', 'ieee trans. adv. packag.': '1521-3323', 'ieee trans. aerosp. electron. syst.': '0018-9251', 'ieee trans. antennas propag.': '0018-926X', 'ieee trans. appl. supercond.': '1051-8223', 'ieee trans. audio': '0096-1620', 'ieee trans. audio electroacoust.': '0018-9278', 'ieee trans. audio speech lang. process.': '1558-7916', 'ieee trans. autom. sci. eng.': '1545-5955', 'ieee trans. automat. control': '0018-9286', 'ieee trans. biomed. eng.': '0018-9294', 'ieee trans. broadcast.': '0018-9316', 'ieee trans. circuits syst.': '0098-4094', 'ieee trans. circuits syst. video technol.': '1051-8215', 'ieee trans. circuits syst., i': '1057-7122', 'ieee trans. circuits syst., ii': '1057-7130', 'ieee trans. commun.': '0090-6778', 'ieee trans. compon. hybrids manuf. technol.': '0148-6411', 'ieee trans. compon. packag. manuf. technol. a': '1070-9886', 'ieee trans. compon. packag. manuf. technol. b': '1070-9894', 'ieee trans. compon. packag. manuf. technol. c': '1083-4400', 'ieee trans. compon. packag. technol.': '1521-3331', 'ieee trans. compon. parts': '0097-6601', 'ieee trans. comput.': '0018-9340', 'ieee trans. comput.-aided des. integrat. circuits syst.': '0278-0070', 'ieee trans. consum. electron.': '0098-3063', 'ieee trans. control syst. technol.': '1063-6536', 'ieee trans. dependable secur. comput.': '1545-5971', 'ieee trans. device mater. reliab.': '1530-4388', 'ieee trans. dielectr. electr. insul.': '1070-9878', 'ieee trans. educ.': '0018-9359', 'ieee trans. electr. insul.': '0018-9367', 'ieee trans. electromagn. compat.': '0018-9375', 'ieee trans. electron devices': '0018-9383', 'ieee trans. electron. packag. manuf.': '1521-334X', 'ieee trans. energy convers.': '0885-8969', 'ieee trans. eng. manag.': '0018-9391', 'ieee trans. evol. comput.': '1089-778X', 'ieee trans. fuzzy syst.': '1063-6706', 'ieee trans. geosci. remote sens.': '0196-2892', 'ieee trans. image process.': '1057-7149', 'ieee trans. ind. appl.': '0093-9994', 'ieee trans. ind. electron.': '0278-0046', 'ieee trans. ind. inform.': '1551-3203', 'ieee trans. inf. technol. biomed.': '1089-7771', 'ieee trans. inf. theory': '0018-9448', 'ieee trans. instrum. meas.': '0018-9456', 'ieee trans. intell. transp. syst.': '1524-9050', 'ieee trans. knowl. data eng.': '1041-4347', 'ieee trans. magn.': '0018-9464', 'ieee trans. manuf. technol.': '0046-838X', 'ieee trans. med. imaging': '0278-0062', 'ieee trans. microw. theory tech.': '0018-9480', 'ieee trans. mob. comput.': '1536-1233', 'ieee trans. multimed.': '1520-9210', 'ieee trans. nanobiosci.': '1536-1241', 'ieee trans. nanotechnol.': '1536-125X', 'ieee trans. neural netw.': '1045-9227', 'ieee trans. neural syst. rehabil. eng.': '1534-4320', 'ieee trans. nucl. sci.': '0018-9499', 'ieee trans. parallel distrib. syst.': '1045-9219', 'ieee trans. parts hybrids packag.': '0361-1000', 'ieee trans. parts mater. packag.': '0018-9502', 'ieee trans. pattern anal. mach. intell.': '0162-8828', 'ieee trans. plasma sci.': '0093-3813', 'ieee trans. power deliv.': '0885-8977', 'ieee trans. power electron.': '0885-8993', 'ieee trans. power syst.': '0885-8950', 'ieee trans. prod. eng. prod.': '0097-4544', 'ieee trans. prof. commun.': '0361-1434', 'ieee trans. rehabil. eng.': '1063-6528', 'ieee trans. reliab.': '0018-9529', 'ieee trans. robot.': '1552-3098', 'ieee trans. robot. autom.': '1042-296X', 'ieee trans. semicond. manuf.': '0894-6507', 'ieee trans. signal process.': '1053-587X', 'ieee trans. softw. eng.': '0098-5589', 'ieee trans. sonics ultrason.': '0018-9537', 'ieee trans. speech audio process.': '1063-6676', 'ieee trans. syst. man cybern.': '0018-9472', 'ieee trans. syst. man cybern. a': '1083-4427', 'ieee trans. syst. man cybern. b': '1083-4419', 'ieee trans. syst. man cybern. c': '1094-6977', 'ieee trans. ultrason. eng.': '0893-6706', 'ieee trans. ultrason., ferroelectr. freq. control': '0885-3010', 'ieee trans. veh. technol.': '0018-9545', 'ieee trans. very large scale integr. (vlsi) syst.': '1063-8210', 'ieee trans. vis. comput. graph.': '1077-2626', 'ieee trans. wirel. commun.': '1536-1276', 'ieee wirel. commun.': '1536-1284', 'ieee/acm trans. netw.': '1063-6692', 'ieee/asme trans. mechatron.': '1083-4435', 'iii-vs rev.': '0961-1290', 'inf. bull. var. stars': '1587-2440', 'inf. manag.': '0378-7206', 'inf. organ.': '1471-7727', 'inf. process. manag.': '0306-4573', 'inf. res.': '1368-1613', 'inf. retr.': '1386-4564', 'inf. sci. appl.': '1069-0115', 'inf. syst. e-bus. manag.': '1617-9854', 'inf. syst. front.': '1387-3326', 'inf. technol. disabil.': '1073-5727', 'inf. technol. manag.': '1385-951X', 'infeuro': '1027-930X', 'infrared phys.': '0020-0891', 'innov. syst. softw. eng.': '1614-5054', 'innov. teach. learn. inf. comput. sci.': '1473-1707', 'innov. technol. transf.': '1013-6452', 'innov. transf. technol.': '1025-692X', 'inorg. mater.': '1608-3172', 'instrum. exp. tech.': '0020-4412', 'int. appl. mech.': '1573-8582', 'int. insolv. rev.': '1099-1107', 'int. j. appl. electromagn. mech.': '1383-5416', 'int. j. appl. math. comput. sci.': '1641-876X', 'int. j. appl. radiat. isot.': '0020-708X', 'int. j. comput. math. learn.': '1382-3892', 'int. j. comput. vis.': '0920-9429', 'int. j. des. comput.': '1329-7147', 'int. j. electron. commun.': '1434-8411', 'int. j. electron. commun. (aeu)': '1434-8411', 'int. j. fract.': '0376-9429', 'int. j. hum.-comput. stud.': '1071-5819', 'int. j. infrared millim. waves': '1572-9559', 'int. j. intell. syst.': '1098-111X', 'int. j. mass spectrom.': '1387-3806', 'int. j. mass spectrom. ion process.': '0168-1176', 'int. j. mod. phys. d': '0218-2718', 'int. j. mod. phys. e': '0218-3013', 'int. j. parallel program.': '0885-7458', 'int. j. pattern recognit. artif. intell.': '0218-0014', 'int. j. prod. econ.': '0925-5273', 'int. j. radiat. appl. instrum. a': '0883-2889', 'int. j. radiat. appl. instrum. d': '1359-0189', 'int. j. radiat. phys. chem. (1969-76)': '0020-7055', 'int. j. radiat. phys. chem., c': '1359-0197', 'int. j. rock mech. min. sci.': '1365-1609', 'int. j. technol. des. educ.': '0957-7572', 'int. j. theor. phys.': '1572-9575', 'int. j. therm. sci.': '1290-0729', 'int. j. thermophys.': '1572-9567', 'int. j. wirel. inf. netw.': '1068-9605', 'intel. artif.': '1137-3601', 'interact. multimed. electron. j. comput. enhanc. learn.': '1525-9102', 'interface sci.': '0927-7056', 'ipn sci.': '1622-5120', 'ire trans. audio': '0096-1981', 'ire trans. autom. control': '0096-199X', 'ire trans. circuit theory': '0098-4094', 'ire trans. compon. parts': '0096-2422', 'ire trans. prod. eng. prod.': '0096-1779', 'ire trans. prod. tech.': '0096-1760', 'ire trans. ultrason. eng.': '0096-1019', 'it archit.': '1557-2145', 'it prof.': '1520-9202', 'itbm-rbm': '1297-9562', 'itbm-rbm news': '1297-9570', 'itnow': '1746-5702', 'j. acm assoc. comput. mach.': '0004-5411', 'j. adhes. sci. technol.': '1568-5616', 'j. algebr. comb.': '0925-9899', 'j. am. soc. inf. sci.': '1097-4571', 'j. am. soc. inf. sci. technol.': '1532-2890', 'j. anal. chem.': '1608-3199', 'j. appl. clin. med. phys.': '1526-9914', 'j. appl. electrochem.': '0021-891X', 'j. appl. mech. tech. phys.': '1573-8620', 'j. appl. spectrosc.': '1573-8647', 'j. artif. intell. res.': '1076-9757', 'j. astrophys. astron.': '0250-6335', 'j. autom. reason.': '0168-7433', 'j. biomater. sci., polym. ed.': '1568-5624', 'j. braz. comput. soc.': '0104-6500', 'j. chem. doc.': '1961-1974', 'j. chem. eng. data': '1520-5134', 'j. chemom.': '1099-128X', 'j. colloid interface sci.': '0021-9797', 'j. comput. aided mater. des.': '0928-1045', 'j. comput. anal. appl.': '1521-1398', 'j. comput. electron.': '1569-8025', 'j. comput. neurosci.': '0929-5313', 'j. comput. phys.': '0021-9991', 'j. comput. sci. technol.': '1860-4749', 'j. comput.- mediat. commun.': '1083-6101', 'j. corros. sci. eng.': '1466-8858', 'j. cosmol. astropart. phys.': '1475-7516', 'j. data sci.': '1683-8602', 'j. des. commun.': '1137-3601', 'j. digit. inf.': '1368-7506', 'j. disp. technol.': '1551-319X', 'j. dyn. control syst.': '1079-2724', 'j. dyn. differ. equ.': '1040-7294', 'j. elast.': '1573-2681', 'j. electroceram.': '1385-3449', 'j. electromagn. waves appl.': '1569-3937', 'j. electron. test.': '0923-8174', 'j. eng. math.': '0022-0833', 'j. eng. phys. thermophys.': '1573-871X', 'j. fluids struct.': '0889-9746', 'j. fourier anal. appl.': '1531-5851', 'j. fusion energy': '1572-9591', 'j. geophys. eng.': '1742-2132', 'j. glob. optim.': '0925-5001', 'j. grid comput.': '1572-9814', 'j. heuristics': '1381-1231', 'j. high energy phys.': '1126-6708', 'j. instrum.': '1748-0221', 'j. intell. inf. syst.': '0925-9902', 'j. intell. manuf.': '1572-8145', 'j. intell. robot. syst.': '1573-0409', 'j. interlibr. loan doc. deliv. electron. reserve': '1072-303X', 'j. jpn. stat. soc.': '1348-6365', 'j. lightwave technol.': '0733-8724', 'j. log. algebr. program.': '1567-8326', 'j. log. lang. inf.': '1572-9583', 'j. low temp. phys.': '1573-7357', 'j. magn. reson.': '1090-7807', 'j. magn. reson. a': '1064-1858', 'j. magn. reson. b': '1064-1866', 'j. magn. reson. imag.': '1522-2586', 'j. mater. cycles waste manag.': '1611-8227', 'j. mater. sci.': '0022-2461', 'j. mater. sci. lett.': '0261-8028', 'j. mater. sci.: mater. electron.': '0957-4522', 'j. mater. sci.: mater. med.': '0957-4530', 'j. mater. synth. process.': '1573-4870', 'j. math. imaging vis.': '0924-9907', 'j. math. model. algorithms': '1570-1166', 'j. math. sci.': '1072-3374', 'j. math. teach. educ.': '1573-1820', 'j. microcomput. appl.': '0745-7138', 'j. microelectromech. syst.': '1057-7157', 'j. micromechatron.': '1568-5632', 'j. nanomater.': '1687-4129', 'j. nanopart. res.': '1572-896X', 'j. netw. comput. appl.': '1084-8045', 'j. netw. syst. manag.': '1064-7570', 'j. neural eng.': '1741-2560', 'j. non-newton. fluid mech.': '0377-0257', 'j. nondestruct. eval.': '0195-9298', 'j. nucl. energy, c': '0368-3281', 'j. object technol.': '1660-1769', 'j. oper. manage.': '0272-6963', 'j. opt.': '0150-536X', 'j. opt. fiber commun. rep.': '1619-8638', 'j. opt. netw.': '1536-5379', 'j. opt. technol.': '1070-9762', 'j. optim. theory appl.': '0022-3239', 'j. parallel distrib. comput.': '0743-7315', 'j. phys. c': '0022-3719', 'j. phys. chem. a': '0092-7325', 'j. phys. chem. b': '0092-7325', 'j. phys. f': '0305-4608', 'j. phys. stud.': '1027-4642', 'j. phys.: conf. ser.': '1742-6596', 'j. polym. res.': '1022-9760', 'j. porous mater.': '1573-4854', 'j. product. anal.': '1573-0441', 'j. radiat. res.': '0449-3060', 'j. radioanal. nucl. chem.': '1588-2780', 'j. res. natl. inst. stand. technol.': '1044-677X', 'j. res. pract.': '1712-851X', 'j. russ. laser res.': '1573-8760', 'j. sci. commun.': '1824-2049', 'j. sci. comput.': '0885-7474', 'j. sci. instrum.': '0950-7671', 'j. soc. radiol. prot.': '0260-2814', 'j. softw. maint. evol.: res. pract.': '1532-0618', 'j. softw. maint.: res. pract.': '1096-908X', 'j. sound vib.': '0022-460X', 'j. south. acad. spec. librariansh.': '1525-321X', 'j. stat. mech., theory exp.': '1742-5468', 'j. stat. phys.': '1572-9613', 'j. stat. softw.': '1548-7660', 'j. strain anal. eng. des.': '0309-3247', 'j. supercomput.': '0920-8542', 'j. supercond.': '1572-9605', 'j. supercond. novel magn.': '1557-1939', 'j. supercrit. fluids': '0896-8446', 'j. syst. integr.': '1573-8787', 'j. technol. transf.': '0829-9912', 'j. theor. probab.': '0894-9840', 'j. therm. anal. calorim.': '1572-8943', 'j. vis. commun. image represent.': '1047-3203', 'j. vis. comput. animat.': '1099-1778', 'j. vlsi signal process. syst. signal image video technol.': '0922-5773', 'jpn. j. appl. phys.': '1347-4065', 'k-theory': '1573-0514', 'katharine sharp rev.': '1083-5261', 'kek news': '1343-3547', 'lasers med. sci.': '1435-604X', 'lett. math. phys.': '1573-0530', 'libr. philos. pract.': '1522-0222', 'linux j.': '1075-3583', 'lith. math. j.': '0363-1672', 'living rev. sol. phys.': '1614-4961', 'low temp. phys.': '1063-777X', 'mach. learn.': '0885-6125', 'macromol. chem. phys.': '1521-3935', 'macromol. mater. eng.': '1439-2054', 'macromol. rapid commun.': '1521-3927', 'macromol. symp.': '1521-3900', 'macromol. theory simul.': '1521-3919', 'magma magn. reson. mater. phys. biol. med.': '1352-8661', 'magn. reson. imaging': '0730-725X', 'mater. sci.': '1068-820X', 'mater. technol.': '1580-3414', 'math. notes': '0001-4346', 'math. phys. anal. geom.': '1385-0172', 'math. probl. eng.': '1563-5147', 'math. scand.': '0025-5521', 'mc j.': '1069-6792', 'meas. tech.': '0543-1972', 'meccanica': '1572-9648', 'mech. compos. mater.': '1573-8922', 'mech. syst. signal process.': '0888-3270', 'mech. time-depend. mater.': '1573-2738', 'med. phys.': '0094-2405', 'mediterr. j. math.': '1660-5454', 'met. sci. heat treat.': '0026-0673', 'metallurgist': '1573-8892', 'methodol. comput. appl. probab.': '1387-5841', 'metrika': '1436-5057', 'metrologia': '1681-7575', 'microfluid. nanofluid.': '1613-4990', 'micromater. nanomater.': '1619-2486', 'micron': '0968-4328', 'micron (1969-83) [online version]': '0047-7206', 'micron microsc. acta': '0739-6260', 'microw. rf': '0745-2993', 'milan j. math.': '1424-9294', 'minds mach.': '1572-8641', 'minerva': '0026-4695', 'mo. j. math. sci.': '0899-6180', 'mob. netw. appl.': '1572-8153', 'mol. eng.': '1572-8951', 'monogr. mat.': '0077-0507', 'monte carlo methods appl.': '1569-3961', 'mrs bull.': '0883-7694', 'multibody syst. dyn.': '1573-272X', 'multidimens. syst. signal process.': '0923-6082', 'multimed. tools appl.': '1380-7501', 'm\xc3\xa9c. ind.': '1296-2139', 'nagoya math. j.': '0027-7630', 'netw. comput.': '1046-4468', 'networks': '1097-0037', 'neural process. lett.': '1370-4621', 'neutron news': '1044-8632', 'new dir. high. educ.': '1522-2322', 'new dir. instit. res.': '1536-075X', 'new dir. stud. serv.': '1536-0695', 'new dir. teach. learn.': '1536-0768', 'nexus netw. j.': '1522-4600', 'nonlinear dyn.': '1573-269X', 'nonlinear phenom. complex syst.': '1561-4085', 'nonprofit couns.': '1520-6785', 'not. am. math. soc.': '1088-9477', 'nouv. rev. opt.': '0335-7368', 'nouv. rev. opt. appl.': '0029-4780', 'ntm int. j. hist. ethics nat. sci. technol. med.': '1420-9144', 'nucl. data sheets': '0090-3752', 'nucl. data sheets, a': '0550-306X', 'nucl. data sheets, b': '0090-550X', 'nucl. eng. des. fusion': '0167-899X', 'nucl. eng. technol.': '1738-5733', 'nucl. fusion': '0029-5515', 'nucl. instrum.': '0369-643X', 'nucl. instrum. methods': '0029-554X', 'nucl. instrum. methods phys. res.': '0167-5087', 'nucl. instrum. methods phys. res., a': '0167-5087', 'nucl. phys.': '0029-5582', 'nucl. phys. news': '1050-6896', 'nucl. struct. eng.': '0369-5816', 'nucl. track detect.': '0145-224X', 'nucl. tracks': '0735-245X', 'nucl. tracks radiat. meas.': '0191-278X', 'nucl. tracks radiat. meas. (1982-85)': '0735-245X', 'nucl. tracks radiat. meas. (1993)': '0969-8078', 'nukleonika': '1508-5791', 'numer. algorithms': '1572-9265', 'numer. methods partial differ. equ.': '1098-2426', 'nuovo cimento, riv.': '0393-697X', 'ocean dyn.': '1616-7228', 'open syst. inf. dyn.': '1230-1612', 'oper. syst. rev.': '0163-5980', 'opt. fiber technol.': '1068-5200', 'opt. netw. mag.': '1572-8161', 'opt. photonics news': '1047-6938', 'opt. quantum electron.': '0306-8919', 'opt. rev.': '1349-9432', 'opt. spectrosc.': '1562-6911', 'opt. switch. netw.': '1573-4277', 'opt. technol.': '0374-3926', 'optik': '0030-4026', 'optim. control appl. methods': '1099-1514', 'optim. eng.': '1389-4420', 'oxid. met.': '0030-770X', 'packag. technol. sci.': '1099-1522', 'pamm': '1617-7061', 'part. part. syst. charact.': '1521-4117', 'period. math. hung.': '1588-2829', 'pers. technol.': '1433-3066', 'pers. ubiquitous comput.': '1617-4917', 'philips j. res.': '0165-5817', 'photonic netw. commun.': '1572-8188', 'photonics nanostruct., fundam. appl.': '1569-4410', 'phys. biol.': '1478-3975', 'phys. earth planet. inter.': '0031-9201', 'phys. fluids (1958-88)': '0031-9171', 'phys. j.': '0031-9279', 'phys. j. indones. phys. soc.': '1410-8860', 'phys. lett.': '0031-9163', 'phys. lett., a': '0375-9601', 'phys. lett., b': '0370-2693', 'phys. life rev.': '1571-0645', 'phys. rev.': '0031-899X', 'phys. rev. (ser. i)': '0031-899X', 'phys. rev. lett.': '0031-9007', 'phys. rev. spec. top. phys. educ. res.': '1554-9178', 'phys. rev., a': '1050-2947;', 'phys. rev., b': '0163-1829', 'phys. rev., c': '0556-2813', 'phys. rev., d': '0556-2821', 'phys. rev., e': '1063-651x', 'phys. status solidi, c': '1610-1642', 'phys. technol.': '0305-4624', 'phys. unserer zeit': '1521-3943', 'physica': '0031-8914', 'physica b c': '0378-4363', 'plasma chem. plasma process.': '1572-8986', 'plasma phys.': '0032-1028', 'plasmas ions': '1288-3255', 'plasmas polym.': '1572-8978', 'poiesis prax.': '1615-6617', 'polym. gels netw.': '0966-7822', 'powder metall. met. ceram.': '1068-1302', 'power technol. eng.': '1570-1468', 'prace mat.- fiz.': '0867-5570', 'probab. surv.': '1549-5787', 'probl. inf. transm.': '0032-9460', 'proc. ieee': '0018-9219', 'proc. indian acad. sci., math. sci.': '0253-4142', 'proc. jpn. acad. a': '0386-2194', 'proc. phys. soc. (1926-48)': '0959-5309', 'proc. phys. soc. (1958-67)': '0370-1328', 'proc. phys. soc. lond.': '1478-7814', 'proc. phys. soc., a': '0370-1298', 'proc. phys. soc., b': '0370-1301', 'prog. cryst. growth charact.': '0146-3535', 'prog. nucl. magn. reson. spectrosc.': '0079-6565', 'prog. phys.': '1555-5615', 'prog. theor. phys., suppl.': '0375-9687', 'progr. struct. eng. mater.': '1528-2716', 'program. comput. softw.': '0361-7688', 'propellants explos. pyrotech.': '1521-4087', 'prot. met.': '0033-1732', 'publ. math. ihes': '1618-1913', 'public underst. sci.': '1361-6609', 'pure appl. opt.': '0963-9659', 'qual. assur. j.': '1099-1786', 'qual. reliab. eng. int.': '1099-1638', 'quant. financ.': '1469-7688', 'quantum inf. process.': '1570-0755', 'quantum opt.': '0954-8998', 'quantum semiclass. opt.': '1355-5111', 'queueing syst.': '1572-9443', 'radiat. phys. chem. (1977-85)': '0146-5724', 'radiochemistry': '1608-3288', 'radioisotopes': '0033-8303', 'radiophys. quantum electron.': '1573-9120', 'radioprotection': '1769-700X', 'ramanujan j.': '1572-9303', 'rbm-news': '0222-0776', 'real time imaging': '1077-2014', 'real time syst.': '0922-6443', 'refract. ind. ceram.': '1083-4877', 'reliab. comput.': '1385-3139', 'rend. lincei': '1720-0768', 'rend. lincei sci. fis. nat.': '1720-0776', 'rend. semin. mat.': '0373-1243', 'res. inf.': '1744-8026', 'res. lang. comput.': '1572-8706', 'res. nondestruct. eval.': '1432-2110', 'rev. electron. suisse sci. inf.': '1661-1802', 'rev. g\xc3\xa9n. therm.': '0035-3159', 'rev. mex. fis.': '0035-001X', 'rev. phys. chim. appl. ind.': '1153-9771', 'rheol. acta': '1435-1528', 'risonyt': '0108-0350', 'rom. rep. phys.': '1221-1451', 'rozpr. mat.': '0860-2581', 'russ. j. electrochem.': '1023-1935', 'russ. j. nondestruct. test.': '1061-8309', 'russ. j. numer. anal. math. model.': '1569-3988', 'russ. microelectron.': '1063-7397', 'russ. phys. j.': '1573-9228', 's. afr. j. inf. manag.': '1560-683X', 'sankhya. indian j. stat.': '0036-4452', 'sci. am.': '0036-8733', 'sci. avenir': '0036-8636', 'sci. educ.': '1098-237X', 'sci. soils': '1432-9492', 'sci. vie hors s\xc3\xa9r.': '0151-0282', 'scientometrics': '0138-9130', 'semicond. int.': '0163-3767', 'semicond. phys. quantum electron. optoelectron.': '1605-6582', 'semigroup forum': '0037-1912', 'sens. actuators a': '0924-4247', 'sens. actuators b': '0925-4005', 'sens. update': '1616-8984', 'sensors': '1424-8220', 'ser. enews': '1476-0576', 'serials': '0953-0460', 'sib. math. j.': '0037-4466', 'sigact news': '0163-5700', 'sigbio newsl.': '0163-5697', 'sigcse bull.': '0097-8418', 'sigsam bull.': '0163-5824', 'simul. model. pract. theory': '1569-190X', 'single mol.': '1438-5171', 'softw. eng. notes': '0163-5948', 'softw. focus': '1529-7950', 'softw. process: improv. pract.': '1099-1670', 'softw. qual. j.': '0963-9314', 'softw. syst. model.': '1619-1374', 'softw. test. verif. reliab.': '1099-1689', 'softw.- pract. exp.': '1097-024X', 'sol. syst. res.': '0038-0946', 'solaris': '1265-4876', 'solid state sci.': '1293-2558', 'space sci. rev.': '0038-6308', 'sparc open access newsl.': '1546-7821', 'stat. comput.': '0960-3174', 'stat. methods appl.': '1613-981X', 'stat. sci.': '0883-4237', 'stnews': '1040-1229', 'strength mater.': '1573-9325', 'struct. des. tall build.': '1099-1794', 'stud. hist. philos. mod. phys.': '1355-2198', 'studia log.': '1572-8730', 'studia math.': '1730-6337', 'subsurf. sens. technol. appl.': '1573-9317', 'superlattices microstruct.': '0749-6036', 'surf. sci. lett.': '0167-2584', 'surf. technol.': '0376-4583', 'surv. high energy phys.': '0142-2413', 'synthese': '1573-0964', 'syst. comput. jpn.': '1520-684X', 'syst. eng.': '1520-6858', 'taiwan. j. math.': '1027-5487', 'telecommun. syst.': '1018-4864', 'theor. math. phys.': '1573-9333', 'theory comput.': '1557-2862', 'theory pract. object syst.': '1096-9942', 'trans. ire prof. group commun. syst.': '0277-6243', 'trans. ire prof. group compon. parts': '0096-2422', 'trans. ire prof. group ultrason. eng.': '0277-626X', 'trans. jpn. soc. artif. intell.': '1346-8030', 'trans. opt. soc.': '1475-4878', 'tribol. lett.': '1573-2711', 'tsinghua sci. technol.': '1007-0214', 'turk. j. math.': '1300-0098', 'turk. j. phys.': '1300-0101', 'ukr. math. j.': '0041-5995', 'ultrason. imaging': '0161-7346', 'univers. access inf. soc.': '1615-5297', 'upgrade': '1684-5285', 'user model. user adapt. interact.': '1573-1391', 'uspekhi fiz. nauk': '0042-1294', 'vak. forsch. prax.': '1522-2454', 'vine': '1474-1032', 'virtual real.': '1434-9957', 'web semant.': '1570-8268', 'weld. int.': '1573-9449', 'wind energy': '1099-1824', 'wirel. commun. mob. comput.': '1530-8677', 'wirel. netw.': '1022-0038', 'wirel. pers. commun.': '0929-6212', 'world pat. inf.': '0172-2190', 'world wide web': '1386-145X', 'z. anal. anwend.': '0232-2064', 'z. angew. math. mech.': '1521-4001', 'z. krist.gr.': '0044-2968', 'z. phys.': '0044-3328', 'z. phys., c': '0170-9739'} def format_element(bfo): """ Returns the ISSN of the record, if known.
Note that you HAVE to pre-generate the correspondances journal->ISSN if you want this element to return something (Run python bfe_issn.py -h to get help). """ journal_name = bfo.field(journal_name_tag) # Here you might want to process journal name # by doing the same operation that has been # done when saving the mappings journal_name = journal_name.lower().strip() if journal_name.endswith("[online]"): journal_name = journal_name[:-8].rstrip() return issns.get(journal_name, '') def build_issns_from_distant_site(url): """ Retrieves the ISSNs from a distant Invenio system. Store the "journal name -> issn" relation. Normalize journal names a little bit: - strip whithespace chars (left and right) - all lower case - remove "[Online]" suffix Print the result as Python dict structure. @param url: the url to load issn from (in the *exact* form: http://www.mysite.com/) """ ## Parse the results of the http request: ## http://cdsweb.cern.ch/search?cc=Periodicals&ot=022,210&of=tm&rg=9000 pattern_field = re.compile(r''' \D*(?P\d*) #document id \s(?P\d*)__\s\$\$a #tag (?P.*?)$ #value ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) request = '/search?cc=Periodicals&ot=022,210&of=tm' try: - fields = urllib.urlopen(url.rstrip('/') + request).readlines() + fields = BIBFORMAT_OPENER.open(url.rstrip('/') + request).readlines() except IOError: sys.stderr.write("Error: Could not connect to %s.\n" % url) sys.exit(0) last_doc_id = None last_issn = None built_issns = {} #built_issns = issns # Uncomment this to extend existing issns dict # (e.g. in case of manual addition) for field in fields: result = pattern_field.search(field) if result: doc_id = result.group('docid') if doc_id != last_doc_id: # Reset saved ISSN if we parse new document last_issn = None tag = result.group('tag') if tag == '022': # Remember this ISSN last_issn = result.group('value') elif tag == '210' and last_issn is not None: # Found a journal name and issn exists. # Depending on how journal names are entered into the # database, you might want to do some processing # before saving: journal = result.group('value') journal = journal.lower().strip() if journal.endswith("[online]"): journal = journal[:-8].rstrip() built_issns[journal] = last_issn last_doc_id = doc_id prtyp = pprint.PrettyPrinter(indent=4) prtyp.pprint(built_issns) def build_issns_from_local_site(): """ Retrieves the ISSNs from the local database. Store the "journal name -> issn" relation. Normalize journal names a little bit: - strip whithespace chars (left and right) - all lower case - remove "[Online]" suffix Print the result as Python dict structure. """ rec_id_list = perform_request_search(cc='Periodicals', of='id') built_issns = {} #built_issns = issns # Uncomment this to extend existing issns dict # (e.g. in case of manual addition) for rec_id in rec_id_list: journal_name_list = get_fieldvalues(rec_id, '210__%') issn_list = get_fieldvalues(rec_id, '022__a') if issn_list: issn = issn_list[0] # There should be only one ISSN for journal_name in journal_name_list: # Depending on how journal names are entered into the database, # you might want to do some processing before saving: journal_name = journal_name.lower().strip() if journal_name.endswith("[online]"): journal_name = journal_name[:-8].rstrip() built_issns[journal_name] = issn prtyp = pprint.PrettyPrinter(indent=4) prtyp.pprint(built_issns) def print_info(): """ Info on element arguments """ print """ Collects ISSN and corresponding journal names from local repository and prints archive as dict structure. Usage: python bfe_issn.py [Options] [url] Example: python bew_issn.py http://cdsweb.cern.ch/ Options: -h, --help print this help -u, --url the URL to collect ISSN from -v, --version print version number If 'url' is not given, collect from local database, using a faster method. Returned structure can then be copied into bfe_issn.py 'format' function. """ if __name__ == '__main__': try: opts, args = getopt.getopt(sys.argv[1:], "hu:v", ["help", "url", "version" ]) except getopt.error: print_info() sys.exit(0) url_arg = None for opt, opt_value in opts: if opt in ["-u", "--url"]: url_arg = opt_value elif opt in ["-v", "--version"]: print __revision__ sys.exit(0) else: print_info() sys.exit(0) if url_arg is not None: build_issns_from_distant_site(url_arg) else: build_issns_from_local_site() diff --git a/modules/bibrank/lib/bibrankgkb.py b/modules/bibrank/lib/bibrankgkb.py index ef16c6012..25ffe57a4 100644 --- a/modules/bibrank/lib/bibrankgkb.py +++ b/modules/bibrank/lib/bibrankgkb.py @@ -1,284 +1,286 @@ ## -*- mode: python; coding: utf-8; -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Usage: bibrankgkb %s [options] Examples: bibrankgkb --input=bibrankgkb.cfg --output=test.kb bibrankgkb -otest.kb -v9 bibrankgkb -v9 Generate options: -i, --input=file input file, default from /etc/bibrank/bibrankgkb.cfg -o, --output=file output file, will be placed in current folder General options: -h, --help print this help and exit -V, --version print version and exit -v, --verbose=LEVEL verbose level (from 0 to 9, default 1) """ __revision__ = "$Id$" import getopt import sys import time -import urllib import re import ConfigParser +from invenio.urlutils import make_invenio_opener from invenio.config import CFG_ETCDIR from invenio.dbquery import run_sql +BIBRANK_OPENER = make_invenio_opener('BibRank') + opts_dict = {} task_id = -1 def bibrankgkb(config): """Generates a .kb file based on input from the configuration file""" if opts_dict["verbose"] >= 1: write_message("Running: Generate Knowledgebase.") journals = {} journal_src = {} i = 0 #Reading the configuration file while config.has_option("bibrankgkb","create_%s" % i): cfg = config.get("bibrankgkb", "create_%s" % i).split(",,") conv = {} temp = {} #Input source 1, either file, www or from db if cfg[0] == "file": conv = get_from_source(cfg[0], cfg[1]) del cfg[0:2] elif cfg[0] == "www": j = 0 urls = {} while config.has_option("bibrankgkb", cfg[1] % j): urls[j] = config.get("bibrankgkb", cfg[1] % j) j = j + 1 conv = get_from_source(cfg[0], (urls, cfg[2])) del cfg[0:3] elif cfg[0] == "db": conv = get_from_source(cfg[0], (cfg[1], cfg[2])) del cfg[0:3] if not conv: del cfg[0:2] else: if opts_dict["verbose"] >= 9: write_message("Using last resource for converting values.") #Input source 2, either file, www or from db if cfg[0] == "file": temp = get_from_source(cfg[0], cfg[1]) elif cfg[0] == "www": j = 0 urls = {} while config.has_option("bibrankgkb", cfg[1] % j): urls[j] = config.get("bibrankgkb", cfg[1] % j) j = j + 1 temp = get_from_source(cfg[0], (urls, cfg[2])) elif cfg[0] == "db": temp = get_from_source(cfg[0], (cfg[1], cfg[2])) i = i + 1 #If a conversion file is given, the names will be converted to the correct convention if len(conv) != 0: if opts_dict["verbose"] >= 9: write_message("Converting between naming conventions given.") temp = convert(conv, temp) if len(journals) != 0: for element in temp.keys(): if not journals.has_key(element): journals[element] = temp[element] else: journals = temp #Writing output file if opts_dict["output"]: f = open(opts_dict["output"], 'w') f.write("#Created by %s\n" % __revision__) f.write("#Sources:\n") for key in journals.keys(): f.write("%s---%s\n" % (key, journals[key])) f.close() if opts_dict["verbose"] >= 9: write_message("Output complete: %s" % opts_dict["output"]) write_message("Number of hits: %s" % len(journals)) if opts_dict["verbose"] >= 9: write_message("Result:") for key in journals.keys(): write_message("%s---%s" % (key, journals[key])) write_message("Total nr of lines: %s" % len(journals)) def showtime(timeused): if opts_dict["verbose"] >= 9: write_message("Time used: %d second(s)." % timeused) def get_from_source(type, data): """Read a source based on the input to the function""" datastruct = {} if type == "db": jvalue = run_sql(data[0]) jname = dict(run_sql(data[1])) if opts_dict["verbose"] >= 9: write_message("Reading data from database using SQL statements:") write_message(jvalue) write_message(jname) for key, value in jvalue: if jname.has_key(key): key2 = jname[key].strip() datastruct[key2] = value #print "%s---%s" % (key2, value) elif type == "file": input = open(data, 'r') if opts_dict["verbose"] >= 9: write_message("Reading data from file: %s" % data) data = input.readlines() datastruct = {} for line in data: #print line if not line[0:1] == "#": key = line.strip().split("---")[0].split() value = line.strip().split("---")[1] datastruct[key] = value #print "%s---%s" % (key,value) elif type == "www": if opts_dict["verbose"] >= 9: write_message("Reading data from www using regexp: %s" % data[1]) write_message("Reading data from url:") for link in data[0].keys(): if opts_dict["verbose"] >= 9: write_message(data[0][link]) - page = urllib.urlopen(data[0][link]) + page = BIBRANK_OPENER.open(data[0][link]) input = page.read() #Using the regexp from config file reg = re.compile(data[1]) iterator = re.finditer(reg, input) for match in iterator: if match.group("value"): key = match.group("key").strip() value = match.group("value").replace(",", ".") datastruct[key] = value if opts_dict["verbose"] == 9: print "%s---%s" % (key, value) return datastruct def convert(convstruct, journals): """Converting between names""" if len(convstruct) > 0 and len(journals) > 0: invconvstruct = dict(map(lambda x: (x[1], x[0]), convstruct.items())) tempjour = {} for name in journals.keys(): if convstruct.has_key(name): tempjour[convstruct[name]] = journals[name] elif invconvstruct.has_key(name): tempjour[name] = journals[name] return tempjour else: return journals def write_message(msg, stream = sys.stdout): """Write message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff.""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) try: stream.write("%s\n" % msg) except UnicodeEncodeError: stream.write("%s\n" % msg.encode('ascii', 'backslashreplace')) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) return def usage(code, msg=''): "Prints usage for this module." if msg: sys.stderr.write("Error: %s.\n" % msg) print >> sys.stderr, \ """ Usage: %s [options] Examples: %s --input=bibrankgkb.cfg --output=test.kb %s -otest.kb -v9 %s -v9 Generate options: -i, --input=file input file, default from /etc/bibrank/bibrankgkb.cfg -o, --output=file output file, will be placed in current folder General options: -h, --help print this help and exit -V, --version print version and exit -v, --verbose=LEVEL verbose level (from 0 to 9, default 1) """ % ((sys.argv[0],) * 4) sys.exit(code) def command_line(): global opts_dict long_flags = ["input=", "output=", "help", "version", "verbose="] short_flags = "i:o:hVv:" format_string = "%Y-%m-%d %H:%M:%S" sleeptime = "" try: opts, args = getopt.getopt(sys.argv[1:], short_flags, long_flags) except getopt.GetoptError, err: write_message(err, sys.stderr) usage(1) if args: usage(1) opts_dict = {"input": "%s/bibrank/bibrankgkb.cfg" % CFG_ETCDIR, "output":"", "verbose":1} sched_time = time.strftime(format_string) user = "" try: for opt in opts: if opt == ("-h","") or opt == ("--help",""): usage(1) elif opt == ("-V","") or opt == ("--version",""): print __revision__ sys.exit(1) elif opt[0] in ["--input", "-i"]: opts_dict["input"] = opt[1] elif opt[0] in ["--output", "-o"]: opts_dict["output"] = opt[1] elif opt[0] in ["--verbose", "-v"]: opts_dict["verbose"] = int(opt[1]) else: usage(1) startCreate = time.time() file = opts_dict["input"] config = ConfigParser.ConfigParser() config.readfp(open(file)) bibrankgkb(config) if opts_dict["verbose"] >= 9: showtime((time.time() - startCreate)) except StandardError, e: write_message(e, sys.stderr) sys.exit(1) return def main(): command_line() if __name__ == "__main__": main() diff --git a/modules/bibsched/lib/bibsched.py b/modules/bibsched/lib/bibsched.py index ab0912210..b36f3b7cd 100644 --- a/modules/bibsched/lib/bibsched.py +++ b/modules/bibsched/lib/bibsched.py @@ -1,1660 +1,1716 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibSched - task management, scheduling and executing system for Invenio """ __revision__ = "$Id$" import os import sys import time import re import marshal import getopt +from itertools import chain from socket import gethostname from subprocess import Popen import signal from invenio.bibtask_config import \ CFG_BIBTASK_VALID_TASKS, \ CFG_BIBTASK_MONOTASKS, \ CFG_BIBTASK_FIXEDTIMETASKS from invenio.config import \ CFG_PREFIX, \ CFG_BIBSCHED_REFRESHTIME, \ CFG_BIBSCHED_LOG_PAGER, \ CFG_BIBSCHED_EDITOR, \ CFG_BINDIR, \ CFG_LOGDIR, \ CFG_BIBSCHED_GC_TASKS_OLDER_THAN, \ CFG_BIBSCHED_GC_TASKS_TO_REMOVE, \ CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE, \ CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS, \ CFG_SITE_URL, \ CFG_BIBSCHED_NODE_TASKS, \ CFG_BIBSCHED_MAX_ARCHIVED_ROWS_DISPLAY from invenio.dbquery import run_sql, real_escape_string from invenio.textutils import wrap_text_in_a_box from invenio.errorlib import register_exception, register_emergency from invenio.shellutils import run_shell_command CFG_VALID_STATUS = ('WAITING', 'SCHEDULED', 'RUNNING', 'CONTINUING', '% DELETED', 'ABOUT TO STOP', 'ABOUT TO SLEEP', 'STOPPED', 'SLEEPING', 'KILLED', 'NOW STOP', 'ERRORS REPORTED') SHIFT_RE = re.compile("([-\+]{0,1})([\d]+)([dhms])") class RecoverableError(StandardError): pass def get_pager(): """ Return the first available pager. """ paths = ( os.environ.get('PAGER', ''), CFG_BIBSCHED_LOG_PAGER, '/usr/bin/less', '/bin/more' ) for pager in paths: if os.path.exists(pager): return pager def get_editor(): """ Return the first available editor. """ paths = ( os.environ.get('EDITOR', ''), CFG_BIBSCHED_EDITOR, '/usr/bin/vim', '/usr/bin/emacs', '/usr/bin/vi', '/usr/bin/nano', ) for editor in paths: if os.path.exists(editor): return editor def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" try: date = time.time() factors = {"d": 24*3600, "h": 3600, "m": 60, "s": 1} m = SHIFT_RE.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) date = time.localtime(date + sign * factor * value) date = time.strftime(format_string, date) else: date = time.strptime(var, format_string) date = time.strftime(format_string, date) return date except: return None def get_my_pid(process, args=''): if sys.platform.startswith('freebsd'): command = "ps -o pid,args | grep '%s %s' | grep -v 'grep' | sed -n 1p" % (process, args) else: command = "ps -C %s o '%%p%%a' | grep '%s %s' | grep -v 'grep' | sed -n 1p" % (process, process, args) answer = run_shell_command(command)[1].strip() if answer == '': answer = 0 else: answer = answer[:answer.find(' ')] return int(answer) def get_task_pid(task_name, task_id, ignore_error=False): """Return the pid of task_name/task_id""" try: path = os.path.join(CFG_PREFIX, 'var', 'run', 'bibsched_task_%d.pid' % task_id) pid = int(open(path).read()) os.kill(pid, signal.SIGUSR2) return pid except (OSError, IOError): if ignore_error: return 0 register_exception() return get_my_pid(task_name, str(task_id)) +def get_last_taskid(): + """Return the last taskid used.""" + return run_sql("SELECT MAX(id) FROM schTASK")[0][0] + +def delete_task(task_id): + """Delete the corresponding task.""" + run_sql("DELETE FROM schTASK WHERE id=%s", (task_id, )) def is_task_scheduled(task_name): """Check if a certain task_name is due for execution (WAITING or RUNNING)""" sql = """SELECT COUNT(proc) FROM schTASK WHERE proc = %s AND (status='WAITING' OR status='RUNNING')""" return run_sql(sql, (task_name,))[0][0] > 0 def get_task_ids_by_descending_date(task_name, statuses=['SCHEDULED']): """Returns list of task ids, ordered by descending runtime.""" sql = """SELECT id FROM schTASK WHERE proc=%s AND (%s) ORDER BY runtime DESC""" \ % " OR ".join(["status = '%s'" % x for x in statuses]) return [x[0] for x in run_sql(sql, (task_name,))] def get_task_options(task_id): """Returns options for task_id read from the BibSched task queue table.""" res = run_sql("SELECT arguments FROM schTASK WHERE id=%s", (task_id,)) try: return marshal.loads(res[0][0]) except IndexError: return list() def gc_tasks(verbose=False, statuses=None, since=None, tasks=None): """Garbage collect the task queue.""" if tasks is None: tasks = CFG_BIBSCHED_GC_TASKS_TO_REMOVE + CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE if since is None: since = '-%id' % CFG_BIBSCHED_GC_TASKS_OLDER_THAN if statuses is None: statuses = ['DONE'] statuses = [status.upper() for status in statuses if status.upper() != 'RUNNING'] date = get_datetime(since) status_query = 'status in (%s)' % ','.join([repr(real_escape_string(status)) for status in statuses]) for task in tasks: if task in CFG_BIBSCHED_GC_TASKS_TO_REMOVE: res = run_sql("""DELETE FROM schTASK WHERE proc=%%s AND %s AND runtime<%%s""" % status_query, (task, date)) write_message('Deleted %s %s tasks (created before %s) with %s' \ % (res, task, date, status_query)) elif task in CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE: run_sql("""INSERT INTO hstTASK(id,proc,host,user, runtime,sleeptime,arguments,status,progress) SELECT id,proc,host,user, runtime,sleeptime,arguments,status,progress FROM schTASK WHERE proc=%%s AND %s AND runtime<%%s""" % status_query, (task, date)) res = run_sql("""DELETE FROM schTASK WHERE proc=%%s AND %s AND runtime<%%s""" % status_query, (task, date)) write_message('Archived %s %s tasks (created before %s) with %s' \ % (res, task, date, status_query)) - def spawn_task(command, wait=False): """ Spawn the provided command in a way that is detached from the current group. In this way a signal received by bibsched is not going to be automatically propagated to the spawned process. """ def preexec(): # Don't forward signals. os.setsid() process = Popen(command, preexec_fn=preexec, shell=True) if wait: process.wait() def bibsched_get_host(task_id): """Retrieve the hostname of the task""" res = run_sql("SELECT host FROM schTASK WHERE id=%s LIMIT 1", (task_id, ), 1) if res: return res[0][0] def bibsched_set_host(task_id, host=""): """Update the progress of task_id.""" return run_sql("UPDATE schTASK SET host=%s WHERE id=%s", (host, task_id)) def bibsched_get_status(task_id): """Retrieve the task status.""" res = run_sql("SELECT status FROM schTASK WHERE id=%s LIMIT 1", (task_id, ), 1) if res: return res[0][0] def bibsched_set_status(task_id, status, when_status_is=None): """Update the status of task_id.""" if when_status_is is None: return run_sql("UPDATE schTASK SET status=%s WHERE id=%s", (status, task_id)) else: return run_sql("UPDATE schTASK SET status=%s WHERE id=%s AND status=%s", (status, task_id, when_status_is)) def bibsched_set_progress(task_id, progress): """Update the progress of task_id.""" return run_sql("UPDATE schTASK SET progress=%s WHERE id=%s", (progress, task_id)) def bibsched_set_priority(task_id, priority): """Update the priority of task_id.""" return run_sql("UPDATE schTASK SET priority=%s WHERE id=%s", (priority, task_id)) def bibsched_send_signal(proc, task_id, sig): """Send a signal to a given task.""" if bibsched_get_host(task_id) != gethostname(): return False pid = get_task_pid(proc, task_id, True) if pid: try: os.kill(pid, sig) return True except OSError: return False return False class Manager(object): def __init__(self, old_stdout): import curses import curses.panel from curses.wrapper import wrapper self.old_stdout = old_stdout self.curses = curses self.helper_modules = CFG_BIBTASK_VALID_TASKS self.running = 1 self.footer_auto_mode = "Automatic Mode [A Manual] [1/2/3 Display] [P Purge] [l/L Log] [O Opts] [E Edit motd] [Q Quit]" self.footer_select_mode = "Manual Mode [A Automatic] [1/2/3 Display Type] [P Purge] [l/L Log] [O Opts] [E Edit motd] [Q Quit]" self.footer_waiting_item = "[R Run] [D Delete] [N Priority]" self.footer_running_item = "[S Sleep] [T Stop] [K Kill]" self.footer_stopped_item = "[I Initialise] [D Delete] [K Acknowledge]" self.footer_sleeping_item = "[W Wake Up] [T Stop] [K Kill]" self.item_status = "" self.rows = [] self.panel = None self.display = 2 self.first_visible_line = 0 self.auto_mode = 0 self.currentrow = None self.current_attr = 0 self.header_lines = 2 self.hostname = gethostname() self.allowed_task_types = CFG_BIBSCHED_NODE_TASKS.get(self.hostname, CFG_BIBTASK_VALID_TASKS) try: motd_path = os.path.join(CFG_PREFIX, "var", "run", "bibsched.motd") self.motd = open(motd_path).read().strip() if len(self.motd) > 0: self.motd = "MOTD [%s] " % time.strftime("%Y-%m-%d %H:%M", time.localtime(os.path.getmtime(motd_path))) + self.motd self.header_lines = 3 except IOError: self.motd = "" self.selected_line = self.header_lines wrapper(self.start) def handle_keys(self, char): if char == -1: return if self.auto_mode and (char not in (self.curses.KEY_UP, self.curses.KEY_DOWN, self.curses.KEY_PPAGE, self.curses.KEY_NPAGE, ord("g"), ord("G"), ord("q"), ord("Q"), ord("a"), ord("A"), ord("1"), ord("2"), ord("3"), ord("p"), ord("P"), ord("o"), ord("O"), ord("l"), ord("L"), ord("e"), ord("E"))): self.display_in_footer("in automatic mode") self.stdscr.refresh() else: status = self.currentrow and self.currentrow[5] or None if char == self.curses.KEY_UP: self.selected_line = max(self.selected_line - 1, self.header_lines) self.repaint() if char == self.curses.KEY_PPAGE: self.selected_line = max(self.selected_line - 10, self.header_lines) self.repaint() elif char == self.curses.KEY_DOWN: self.selected_line = min(self.selected_line + 1, len(self.rows) + self.header_lines - 1) self.repaint() elif char == self.curses.KEY_NPAGE: self.selected_line = min(self.selected_line + 10, len(self.rows) + self.header_lines - 1) self.repaint() elif char == self.curses.KEY_HOME: self.first_visible_line = 0 self.selected_line = self.header_lines elif char == ord("g"): self.selected_line = self.header_lines self.repaint() elif char == ord("G"): self.selected_line = len(self.rows) + self.header_lines - 1 self.repaint() elif char in (ord("a"), ord("A")): self.change_auto_mode() elif char == ord("l"): self.openlog() elif char == ord("L"): self.openlog(err=True) elif char in (ord("w"), ord("W")): self.wakeup() elif char in (ord("n"), ord("N")): self.change_priority() elif char in (ord("r"), ord("R")): if status in ('WAITING', 'SCHEDULED'): self.run() elif char in (ord("s"), ord("S")): self.sleep() elif char in (ord("k"), ord("K")): if status in ('ERROR', 'DONE WITH ERRORS', 'ERRORS REPORTED'): self.acknowledge() elif status is not None: self.kill() elif char in (ord("t"), ord("T")): self.stop() elif char in (ord("d"), ord("D")): self.delete() elif char in (ord("i"), ord("I")): self.init() elif char in (ord("p"), ord("P")): self.purge_done() elif char in (ord("o"), ord("O")): self.display_task_options() elif char in (ord("e"), ord("E")): self.edit_motd() elif char == ord("1"): self.display = 1 self.first_visible_line = 0 self.selected_line = self.header_lines self.display_in_footer("only done processes are displayed") elif char == ord("2"): self.display = 2 self.first_visible_line = 0 self.selected_line = self.header_lines self.display_in_footer("only not done processes are displayed") elif char == ord("3"): self.display = 3 self.first_visible_line = 0 self.selected_line = self.header_lines self.display_in_footer("only archived processes are displayed") elif char in (ord("q"), ord("Q")): if self.curses.panel.top_panel() == self.panel: self.panel = None self.curses.panel.update_panels() else: self.running = 0 return def openlog(self, err=False): task_id = self.currentrow[0] if err: logname = os.path.join(CFG_LOGDIR, 'bibsched_task_%d.err' % task_id) else: logname = os.path.join(CFG_LOGDIR, 'bibsched_task_%d.log' % task_id) if os.path.exists(logname): pager = get_pager() if os.path.exists(pager): self.curses.endwin() os.system('%s %s' % (pager, logname)) print >> self.old_stdout, "\rPress ENTER to continue", self.old_stdout.flush() raw_input() self.curses.panel.update_panels() else: self._display_message_box("No pager was found") def edit_motd(self): """Add, delete or change the motd message that will be shown when the bibsched monitor starts.""" editor = get_editor() if editor: motdpath = os.path.join(CFG_PREFIX, "var", "run", "bibsched.motd") previous = self.motd self.curses.endwin() os.system("%s %s" % (editor, motdpath)) self.curses.panel.update_panels() try: self.motd = open(motdpath).read().strip() except IOError: self.motd = "" if len(self.motd) > 0: self.motd = "MOTD [%s] " % time.strftime("%m-%d-%Y %H:%M", time.localtime(os.path.getmtime(motdpath))) + self.motd if previous[24:] != self.motd[24:]: if len(previous) == 0: Log('motd set to "%s"' % self.motd.replace("\n", "|")) self.selected_line += 1 self.header_lines += 1 elif len(self.motd) == 0: Log('motd deleted') self.selected_line -= 1 self.header_lines -= 1 else: Log('motd changed to "%s"' % self.motd.replace("\n", "|")) else: self._display_message_box("No editor was found") def display_task_options(self): """Nicely display information about current process.""" msg = ' id: %i\n\n' % self.currentrow[0] pid = get_task_pid(self.currentrow[1], self.currentrow[0], True) if pid is not None: msg += ' pid: %s\n\n' % pid msg += ' priority: %s\n\n' % self.currentrow[8] msg += ' proc: %s\n\n' % self.currentrow[1] msg += ' user: %s\n\n' % self.currentrow[2] msg += ' runtime: %s\n\n' % self.currentrow[3].strftime("%Y-%m-%d %H:%M:%S") msg += ' sleeptime: %s\n\n' % self.currentrow[4] msg += ' status: %s\n\n' % self.currentrow[5] msg += ' progress: %s\n\n' % self.currentrow[6] arguments = marshal.loads(self.currentrow[7]) if type(arguments) is dict: # FIXME: REMOVE AFTER MAJOR RELEASE 1.0 msg += ' options : %s\n\n' % arguments else: msg += 'executable : %s\n\n' % arguments[0] msg += ' arguments : %s\n\n' % ' '.join(arguments[1:]) msg += '\n\nPress q to quit this panel...' msg = wrap_text_in_a_box(msg, style='no_border') rows = msg.split('\n') height = len(rows) + 2 width = max([len(row) for row in rows]) + 4 try: self.win = self.curses.newwin( height, width, (self.height - height) / 2 + 1, (self.width - width) / 2 + 1 ) except self.curses.error: return self.panel = self.curses.panel.new_panel(self.win) self.panel.top() self.win.border() i = 1 for row in rows: self.win.addstr(i, 2, row, self.current_attr) i += 1 self.win.refresh() while self.win.getkey() != 'q': pass self.panel = None def count_processes(self, status): out = 0 res = run_sql("""SELECT COUNT(id) FROM schTASK WHERE status=%s GROUP BY status""", (status,)) try: out = res[0][0] except: pass return out def change_priority(self): task_id = self.currentrow[0] priority = self.currentrow[8] new_priority = self._display_ask_number_box("Insert the desired \ priority for task %s. The smaller the number the less the priority. Note that \ a number less than -10 will mean to always postpone the task while a number \ bigger than 10 will mean some tasks with less priority could be stopped in \ order to let this task run. The current priority is %s. New value:" \ % (task_id, priority)) try: new_priority = int(new_priority) except ValueError: return bibsched_set_priority(task_id, new_priority) def wakeup(self): task_id = self.currentrow[0] process = self.currentrow[1] status = self.currentrow[5] #if self.count_processes('RUNNING') + self.count_processes('CONTINUING') >= 1: #self.display_in_footer("a process is already running!") if status == "SLEEPING": if not bibsched_send_signal(process, task_id, signal.SIGCONT): bibsched_set_status(task_id, "ERROR", "SLEEPING") self.display_in_footer("process woken up") else: self.display_in_footer("process is not sleeping") self.stdscr.refresh() def _display_YN_box(self, msg): """Utility to display confirmation boxes.""" msg += ' (Y/N)' msg = wrap_text_in_a_box(msg, style='no_border') rows = msg.split('\n') height = len(rows) + 2 width = max([len(row) for row in rows]) + 4 self.win = self.curses.newwin( height, width, (self.height - height) / 2 + 1, (self.width - width) / 2 + 1 ) self.panel = self.curses.panel.new_panel(self.win) self.panel.top() self.win.border() i = 1 for row in rows: self.win.addstr(i, 2, row, self.current_attr) i += 1 self.win.refresh() try: while 1: c = self.win.getch() if c in (ord('y'), ord('Y')): return True elif c in (ord('n'), ord('N')): return False finally: self.panel = None def _display_ask_number_box(self, msg): """Utility to display confirmation boxes.""" msg = wrap_text_in_a_box(msg, style='no_border') rows = msg.split('\n') height = len(rows) + 3 width = max([len(row) for row in rows]) + 4 self.win = self.curses.newwin( height, width, (self.height - height) / 2 + 1, (self.width - width) / 2 + 1 ) self.panel = self.curses.panel.new_panel(self.win) self.panel.top() self.win.border() i = 1 for row in rows: self.win.addstr(i, 2, row, self.current_attr) i += 1 self.win.refresh() self.win.move(height - 2, 2) self.curses.echo() ret = self.win.getstr() self.curses.noecho() return ret def _display_message_box(self, msg): """Utility to display message boxes.""" rows = msg.split('\n') height = len(rows) + 2 width = max([len(row) for row in rows]) + 3 self.win = self.curses.newwin( height, width, (self.height - height) / 2 + 1, (self.width - width) / 2 + 1 ) self.panel = self.curses.panel.new_panel(self.win) self.panel.top() self.win.border() i = 1 for row in rows: self.win.addstr(i, 2, row, self.current_attr) i += 1 self.win.refresh() self.win.move(height - 2, 2) self.win.getkey() self.curses.noecho() def purge_done(self): """Garbage collector.""" if self._display_YN_box( "You are going to purge the list of DONE tasks.\n\n" "%s tasks, submitted since %s days, will be archived.\n\n" "%s tasks, submitted since %s days, will be deleted.\n\n" "Are you sure?" % ( ', '.join(CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE), CFG_BIBSCHED_GC_TASKS_OLDER_THAN, ', '.join(CFG_BIBSCHED_GC_TASKS_TO_REMOVE), CFG_BIBSCHED_GC_TASKS_OLDER_THAN)): gc_tasks() self.display_in_footer("DONE processes purged") def run(self): task_id = self.currentrow[0] process = self.currentrow[1].split(':')[0] status = self.currentrow[5] #if self.count_processes('RUNNING') + self.count_processes('CONTINUING') >= 1: #self.display_in_footer("a process is already running!") if status == "WAITING": if process in self.helper_modules: if run_sql("""UPDATE schTASK SET status='SCHEDULED', host=%s WHERE id=%s and status='WAITING'""", (self.hostname, task_id)): program = os.path.join(CFG_BINDIR, process) command = "%s %s > /dev/null 2> /dev/null" % (program, str(task_id)) spawn_task(command) Log("manually running task #%d (%s)" % (task_id, process)) else: ## Process already running (typing too quickly on the keyboard?) pass else: self.display_in_footer("Process %s is not in the list of allowed processes." % process) else: self.display_in_footer("Process status should be SCHEDULED or WAITING!") def acknowledge(self): task_id = self.currentrow[0] status = self.currentrow[5] if status in ('ERROR', 'DONE WITH ERRORS', 'ERRORS REPORTED'): bibsched_set_status(task_id, 'ACK ' + status, status) self.display_in_footer("Acknowledged error") def sleep(self): task_id = self.currentrow[0] status = self.currentrow[5] if status in ('RUNNING', 'CONTINUING'): bibsched_set_status(task_id, 'ABOUT TO SLEEP', status) self.display_in_footer("SLEEP signal sent to task #%s" % task_id) else: self.display_in_footer("Cannot put to sleep non-running processes") def kill(self): task_id = self.currentrow[0] process = self.currentrow[1] status = self.currentrow[5] if status in ('RUNNING', 'CONTINUING', 'ABOUT TO STOP', 'ABOUT TO SLEEP', 'SLEEPING'): if self._display_YN_box("Are you sure you want to kill the %s process %s?" % (process, task_id)): bibsched_send_signal(process, task_id, signal.SIGKILL) bibsched_set_status(task_id, 'KILLED') self.display_in_footer("KILL signal sent to task #%s" % task_id) else: self.display_in_footer("Cannot kill non-running processes") def stop(self): task_id = self.currentrow[0] process = self.currentrow[1] status = self.currentrow[5] if status in ('RUNNING', 'CONTINUING', 'ABOUT TO SLEEP', 'SLEEPING'): if status == 'SLEEPING': bibsched_set_status(task_id, 'NOW STOP', 'SLEEPING') bibsched_send_signal(process, task_id, signal.SIGCONT) count = 10 while bibsched_get_status(task_id) == 'NOW STOP': if count <= 0: bibsched_set_status(task_id, 'ERROR', 'NOW STOP') self.display_in_footer("It seems impossible to wakeup this task.") return time.sleep(CFG_BIBSCHED_REFRESHTIME) count -= 1 else: bibsched_set_status(task_id, 'ABOUT TO STOP', status) self.display_in_footer("STOP signal sent to task #%s" % task_id) else: self.display_in_footer("Cannot stop non-running processes") def delete(self): task_id = self.currentrow[0] status = self.currentrow[5] if status not in ('RUNNING', 'CONTINUING', 'SLEEPING', 'SCHEDULED', 'ABOUT TO STOP', 'ABOUT TO SLEEP'): bibsched_set_status(task_id, "%s_DELETED" % status, status) self.display_in_footer("process deleted") self.update_rows() self.repaint() else: self.display_in_footer("Cannot delete running processes") def init(self): task_id = self.currentrow[0] status = self.currentrow[5] if status not in ('RUNNING', 'CONTINUING', 'SLEEPING'): bibsched_set_status(task_id, "WAITING") bibsched_set_progress(task_id, "") bibsched_set_host(task_id, "") self.display_in_footer("process initialised") else: self.display_in_footer("Cannot initialise running processes") def change_auto_mode(self): if self.auto_mode: program = os.path.join(CFG_BINDIR, "bibsched") COMMAND = "%s -q halt" % program os.system(COMMAND) self.auto_mode = 0 else: program = os.path.join(CFG_BINDIR, "bibsched") COMMAND = "%s -q start" % program os.system(COMMAND) self.auto_mode = 1 self.stdscr.refresh() def put_line(self, row, header=False, motd=False): ## ROW: (id,proc,user,runtime,sleeptime,status,progress,arguments,priority,host) ## 0 1 2 3 4 5 6 7 8 9 col_w = [7 , 25, 15, 21, 7, 11, 21, 60] maxx = self.width if self.y == self.selected_line - self.first_visible_line and self.y > 1: self.item_status = row[5] self.currentrow = row if motd: attr = self.curses.color_pair(1) + self.curses.A_BOLD elif self.y == self.header_lines - 2: if self.auto_mode: attr = self.curses.color_pair(2) + self.curses.A_STANDOUT + self.curses.A_BOLD else: attr = self.curses.color_pair(8) + self.curses.A_STANDOUT + self.curses.A_BOLD elif row[5] == "DONE": attr = self.curses.color_pair(5) + self.curses.A_BOLD elif row[5] == "STOPPED": attr = self.curses.color_pair(6) + self.curses.A_BOLD elif row[5].find("ERROR") > -1: attr = self.curses.color_pair(4) + self.curses.A_BOLD elif row[5] == "WAITING": attr = self.curses.color_pair(3) + self.curses.A_BOLD elif row[5] in ("RUNNING", "CONTINUING"): attr = self.curses.color_pair(2) + self.curses.A_BOLD elif not header and row[8]: attr = self.curses.A_BOLD else: attr = self.curses.A_NORMAL ## If the task is not relevant for this instance ob BibSched because ## the type of the task can not be run, or it is running on another ## machine: make it a different color if not header and (row[1].split(':')[0] not in self.allowed_task_types or (row[9] != '' and row[9] != self.hostname)): attr = self.curses.color_pair(6) if not row[6]: nrow = list(row) nrow[6] = 'Not allowed on this instance' row = tuple(nrow) if self.y == self.selected_line - self.first_visible_line and self.y > 1: self.current_attr = attr attr += self.curses.A_REVERSE if header: # Dirty hack. put_line should be better refactored. # row contains one less element: arguments ## !!! FIXME: THIS IS CRAP myline = str(row[0]).ljust(col_w[0]-1) myline += str(row[1]).ljust(col_w[1]-1) myline += str(row[2]).ljust(col_w[2]-1) myline += str(row[3]).ljust(col_w[3]-1) myline += str(row[4]).ljust(col_w[4]-1) myline += str(row[5]).ljust(col_w[5]-1) myline += str(row[6]).ljust(col_w[6]-1) myline += str(row[7]).ljust(col_w[7]-1) elif motd: myline = str(row[0]) else: ## ROW: (id,proc,user,runtime,sleeptime,status,progress,arguments,priority,host) ## 0 1 2 3 4 5 6 7 8 9 priority = str(row[8] and ' [%s]' % row[8] or '') myline = str(row[0]).ljust(col_w[0])[:col_w[0]-1] myline += (str(row[1])[:col_w[1]-len(priority)-2] + priority).ljust(col_w[1]-1) myline += str(row[2]).ljust(col_w[2])[:col_w[2]-1] myline += str(row[3]).ljust(col_w[3])[:col_w[3]-1] myline += str(row[4]).ljust(col_w[4])[:col_w[4]-1] myline += str(row[5]).ljust(col_w[5])[:col_w[5]-1] myline += str(row[9]).ljust(col_w[6])[:col_w[6]-1] myline += str(row[6]).ljust(col_w[7])[:col_w[7]-1] myline = myline.ljust(maxx) try: self.stdscr.addnstr(self.y, 0, myline, maxx, attr) except self.curses.error: pass self.y += 1 def display_in_footer(self, footer, i=0, print_time_p=0): if print_time_p: footer = "%s %s" % (footer, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) maxx = self.stdscr.getmaxyx()[1] footer = footer.ljust(maxx) if self.auto_mode: colorpair = 2 else: colorpair = 1 try: self.stdscr.addnstr(self.y - i, 0, footer, maxx - 1, self.curses.A_STANDOUT + self.curses.color_pair(colorpair) + self.curses.A_BOLD) except self.curses.error: pass def repaint(self): if server_pid(): self.auto_mode = 1 else: if self.auto_mode == 1: self.curses.beep() self.auto_mode = 0 self.y = 0 self.stdscr.erase() self.height, self.width = self.stdscr.getmaxyx() maxy = self.height - 2 #maxx = self.width if len(self.motd) > 0: self.put_line((self.motd.strip().replace("\n", " - ")[:79], "", "", "", "", "", "", "", ""), header=False, motd=True) self.put_line(("ID", "PROC [PRI]", "USER", "RUNTIME", "SLEEP", "STATUS", "HOST", "PROGRESS"), header=True) self.put_line(("", "", "", "", "", "", "", ""), header=True) if self.selected_line > maxy + self.first_visible_line - 1: self.first_visible_line = self.selected_line - maxy + 1 if self.selected_line < self.first_visible_line + 2: self.first_visible_line = self.selected_line - 2 for row in self.rows[self.first_visible_line:self.first_visible_line+maxy-2]: self.put_line(row) self.y = self.stdscr.getmaxyx()[0] - 1 if self.auto_mode: self.display_in_footer(self.footer_auto_mode, print_time_p=1) else: self.display_in_footer(self.footer_select_mode, print_time_p=1) footer2 = "" if self.item_status.find("DONE") > -1 or self.item_status in ("ERROR", "STOPPED", "KILLED", "ERRORS REPORTED"): footer2 += self.footer_stopped_item elif self.item_status in ("RUNNING", "CONTINUING", "ABOUT TO STOP", "ABOUT TO SLEEP"): footer2 += self.footer_running_item elif self.item_status == "SLEEPING": footer2 += self.footer_sleeping_item elif self.item_status == "WAITING": footer2 += self.footer_waiting_item self.display_in_footer(footer2, 1) self.stdscr.refresh() def update_rows(self): if self.display == 1: table = "schTASK" where = "and (status='DONE' or status LIKE 'ACK%')" order = "runtime DESC" limit = "" elif self.display == 2: table = "schTASK" where = "and (status<>'DONE' and status NOT LIKE 'ACK%')" order = "runtime ASC" limit = "limit %s" % CFG_BIBSCHED_MAX_ARCHIVED_ROWS_DISPLAY else: table = "hstTASK" order = "runtime DESC" where = "" limit = "" self.rows = run_sql("""SELECT id, proc, user, runtime, sleeptime, status, progress, arguments, priority, host, sequenceid FROM %s WHERE status NOT LIKE '%%_DELETED' %s ORDER BY %s %s""" % (table, where, order, limit)) # Make sure we are not selecting a line that disappeared self.selected_line = min(self.selected_line, len(self.rows) + self.header_lines - 1) - def start(self, stdscr): os.environ['BIBSCHED_MODE'] = 'manual' if self.curses.has_colors(): self.curses.start_color() self.curses.init_pair(8, self.curses.COLOR_WHITE, self.curses.COLOR_BLACK) self.curses.init_pair(1, self.curses.COLOR_WHITE, self.curses.COLOR_RED) self.curses.init_pair(2, self.curses.COLOR_GREEN, self.curses.COLOR_BLACK) self.curses.init_pair(3, self.curses.COLOR_MAGENTA, self.curses.COLOR_BLACK) self.curses.init_pair(4, self.curses.COLOR_RED, self.curses.COLOR_BLACK) self.curses.init_pair(5, self.curses.COLOR_BLUE, self.curses.COLOR_BLACK) self.curses.init_pair(6, self.curses.COLOR_CYAN, self.curses.COLOR_BLACK) self.curses.init_pair(7, self.curses.COLOR_YELLOW, self.curses.COLOR_BLACK) self.stdscr = stdscr self.base_panel = self.curses.panel.new_panel(self.stdscr) self.base_panel.bottom() self.curses.panel.update_panels() self.height, self.width = stdscr.getmaxyx() self.stdscr.erase() if server_pid(): self.auto_mode = 1 ring = 4 if len(self.motd) > 0: self._display_message_box(self.motd + "\nPress any key to close") while self.running: if ring == 4: self.update_rows() ring = 0 self.repaint() ring += 1 char = -1 try: char = timed_out(self.stdscr.getch, 1) if char == 27: # escaping sequence char = self.stdscr.getch() if char == 79: # arrow char = self.stdscr.getch() if char == 65: # arrow up char = self.curses.KEY_UP elif char == 66: # arrow down char = self.curses.KEY_DOWN elif char == 72: char = self.curses.KEY_PPAGE elif char == 70: char = self.curses.KEY_NPAGE elif char == 91: char = self.stdscr.getch() if char == 53: char = self.stdscr.getch() if char == 126: char = self.curses.KEY_HOME except TimedOutExc: char = -1 self.handle_keys(char) + class BibSched(object): def __init__(self, debug=False): self.debug = debug self.hostname = gethostname() self.helper_modules = CFG_BIBTASK_VALID_TASKS ## All the tasks in the queue that the node is allowed to manipulate self.node_relevant_bibupload_tasks = () self.node_relevant_waiting_tasks = () self.node_relevant_active_tasks = () ## All tasks of all nodes self.active_tasks_all_nodes = () self.allowed_task_types = CFG_BIBSCHED_NODE_TASKS.get(self.hostname, CFG_BIBTASK_VALID_TASKS) os.environ['BIBSCHED_MODE'] = 'automatic' def tie_task_to_host(self, task_id): """Sets the hostname of a task to the machine executing this script @return: True if the scheduling was successful, False otherwise, e.g. if the task was scheduled concurrently on a different host. """ if not run_sql("""SELECT id FROM schTASK WHERE id=%s AND host='' AND status='WAITING'""", (task_id, )): ## The task was already tied? return False run_sql("""UPDATE schTASK SET host=%s, status='SCHEDULED' WHERE id=%s AND host='' AND status='WAITING'""", (self.hostname, task_id)) return bool(run_sql("SELECT id FROM schTASK WHERE id=%s AND host=%s", (task_id, self.hostname))) def filter_for_allowed_tasks(self): """ Removes all tasks that are not allowed in this Invenio instance """ n_waiting = [] n_active = [] if "bibupload" not in self.allowed_task_types: self.node_relevant_bibupload_tasks = () for task_id, proc, runtime, status, priority, host, sequenceid in self.node_relevant_waiting_tasks: procname = proc.split(':')[0] if procname in self.allowed_task_types: n_waiting.append((task_id, proc, runtime, status, priority, host, sequenceid)) for task_id, proc, runtime, status, priority, host, sequenceid in self.node_relevant_active_tasks: procname = proc.split(':')[0] if procname in self.allowed_task_types: n_active.append((task_id, proc, runtime, status, priority, host, sequenceid)) self.node_relevant_active_tasks = tuple(n_active) self.node_relevant_waiting_tasks = tuple(n_waiting) def is_task_safe_to_execute(self, proc1, proc2): """Return True when the two tasks can run concurrently.""" return proc1 != proc2 # and not proc1.startswith('bibupload') and not proc2.startswith('bibupload') def get_tasks_to_sleep_and_stop(self, proc, task_set): """Among the task_set, return the list of tasks to stop and the list of tasks to sleep. """ if proc in CFG_BIBTASK_MONOTASKS: - return [], task_set + return [], [t for t in task_set \ + if t[3] not in ('SLEEPING', 'ABOUT TO SLEEP')] min_prio = None min_task_id = None min_proc = None min_status = None min_sequenceid = None to_stop = [] ## For all the lower priority tasks... for (this_task_id, this_proc, this_priority, this_status, this_sequenceid) in task_set: if not self.is_task_safe_to_execute(this_proc, proc): to_stop.append((this_task_id, this_proc, this_priority, this_status, this_sequenceid)) elif (min_prio is None or this_priority < min_prio) and \ this_status not in ('SLEEPING', 'ABOUT TO SLEEP'): ## We don't put to sleep already sleeping task :-) min_prio = this_priority min_task_id = this_task_id min_proc = this_proc min_status = this_status min_sequenceid = this_sequenceid - if len(task_set) < CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS and not to_stop: - ## All the task are safe and there are enough resources - return [], [] + if to_stop: + return to_stop, [] + elif min_task_id: + return [], [(min_task_id, min_proc, min_prio, min_status, min_sequenceid)] else: - if to_stop: - return to_stop, [] - elif min_task_id: - return [], [(min_task_id, min_proc, min_prio, min_status, min_sequenceid)] - else: - return [], [] + return [], [] def split_active_tasks_by_priority(self, task_id, priority): """Return two lists: the list of task_ids with lower priority and those with higher or equal priority.""" higher = [] lower = [] ### !!! We allready have this in node_relevant_active_tasks for other_task_id, task_proc, runtime, status, task_priority, task_host, sequenceid in self.node_relevant_active_tasks: # for other_task_id, task_proc, runtime, status, task_priority, task_host in self.node_relevant_active_tasks: # for other_task_id, task_proc, task_priority, status in self.get_running_tasks(): if task_id == other_task_id: continue if task_priority < priority and task_host == self.hostname: lower.append((other_task_id, task_proc, task_priority, status, sequenceid)) elif task_host == self.hostname: higher.append((other_task_id, task_proc, task_priority, status, sequenceid)) return lower, higher def handle_task(self, task_id, proc, runtime, status, priority, host, sequenceid): """Perform needed action of the row representing a task. Return True when task_status need to be refreshed""" debug = self.debug if debug: - Log("task_id: %s, proc: %s, runtime: %s, status: %s, priority: %s" % (task_id, proc, runtime, status, priority)) + Log("task_id: %s, proc: %s, runtime: %s, status: %s, priority: %s, host: %s, sequenceid: %s" % + (task_id, proc, runtime, status, priority, host, sequenceid)) if (task_id, proc, runtime, status, priority, host, sequenceid) in self.node_relevant_waiting_tasks: if debug: Log("Trying to run %s" % task_id) if priority < -10: return False lower, higher = self.split_active_tasks_by_priority(task_id, priority) if debug: Log('lower: %s' % lower) Log('higher: %s' % higher) - for other_task_id, other_proc, other_runtime, other_status, other_priority, other_host, other_sequenceid in self.active_tasks_all_nodes: - if not self.is_task_safe_to_execute(proc, other_proc): + + for other_task_id, other_proc, other_runtime, other_status, \ + other_priority, other_host, other_sequenceid in chain( + self.node_relevant_sleeping_tasks, self.active_tasks_all_nodes): + if task_id != other_task_id and \ + not self.is_task_safe_to_execute(proc, other_proc): ### !!! WE NEED TO CHECK FOR TASKS THAT CAN ONLY BE EXECUTED ON ONE MACHINE AT ONE TIME ### !!! FOR EXAMPLE BIBUPLOADS WHICH NEED TO BE EXECUTED SEQUENTIALLY AND NEVER CONCURRENTLY ## There's at least a higher priority task running that ## cannot run at the same time of the given task. ## We give up if debug: - Log("Cannot run because task_id: %s, proc: %s is the queue and incompatible" % (other_task_id, other_proc)) + Log("Cannot run because task_id: %s, proc: %s is in the queue and incompatible" % (other_task_id, other_proc)) return False if sequenceid: max_priority = run_sql("""SELECT MAX(priority) FROM schTASK WHERE status='WAITING' AND sequenceid=%s""", (sequenceid, ))[0][0] if run_sql("""UPDATE schTASK SET priority=%s WHERE status='WAITING' AND sequenceid=%s""", (max_priority, sequenceid)): Log("Raised all waiting tasks with sequenceid " \ "%s to the max priority %s" % (sequenceid, max_priority)) ## Some priorities where raised return False + current_runtimes = run_sql("""SELECT id, runtime FROM schTASK WHERE sequenceid=%s AND status='WAITING' ORDER by id""", (sequenceid, )) + runtimes_adjusted = False + if current_runtimes: + last_runtime = current_runtimes[0][1] + for the_task_id, runtime in current_runtimes: + if runtime < last_runtime: + run_sql("""UPDATE schTASK SET runtime=%s WHERE id=%s""", (last_runtime, the_task_id)) + if debug: + Log("Adjusted runtime of task_id %s to %s in order to be executed in the correct sequenceid order" % (the_task_id, last_runtime)) + runtimes_adjusted = True + runtime = last_runtime + last_runtime = runtime + if runtimes_adjusted: + ## Some runtime have been adjusted + return False for other_task_id, other_proc, other_dummy, other_status, other_sequenceid in higher + lower: if sequenceid is not None and \ sequenceid == other_sequenceid and task_id > other_task_id: Log('Task %s need to run after task %s since they have the same sequence id: %s' % (task_id, other_task_id, sequenceid)) ## If there is a task with same sequence number then do not run the current task return False if proc in CFG_BIBTASK_MONOTASKS and higher: ## This is a monotask if debug: Log("Cannot run because this is a monotask and there are higher priority tasks: %s" % (higher, )) return False ## No higher priority task have issue with the given task. if proc not in CFG_BIBTASK_FIXEDTIMETASKS and len(higher) >= CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS: ### !!! THIS HAS TO BE ADAPTED FOR MULTINODE ### !!! Basically, the number of concurrent tasks should count per node ## Not enough resources. if debug: - Log("Cannot run because all resource (%s) are used (%s), higher: %s" % (CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS, len(higher), higher)) + Log("Cannot run because all resources (%s) are used (%s), higher: %s" % (CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS, len(higher), higher)) return False ## We check if it is necessary to stop/put to sleep some lower priority ## task. tasks_to_stop, tasks_to_sleep = self.get_tasks_to_sleep_and_stop(proc, lower) if debug: Log('tasks_to_stop: %s' % tasks_to_stop) Log('tasks_to_sleep: %s' % tasks_to_sleep) if tasks_to_stop and priority < 100: ## Only tasks with priority higher than 100 have the power ## to put task to stop. if debug: Log("Cannot run because there are task to stop: %s and priority < 100" % tasks_to_stop) return False procname = proc.split(':')[0] - if not tasks_to_stop and not tasks_to_sleep: + if not tasks_to_stop and (not tasks_to_sleep or len(self.node_relevant_active_tasks) < CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS): + if proc in CFG_BIBTASK_MONOTASKS and self.node_relevant_active_tasks: + if debug: + Log("Cannot run because this is a monotask and there are other tasks running: %s" % (self.node_relevant_active_tasks, )) + return False + + if len(self.node_relevant_active_tasks) >= CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS: + if debug: + Log("Cannot run because all resources (%s) are used (%s), active: %s" % (CFG_BIBSCHED_MAX_NUMBER_CONCURRENT_TASKS, len(self.node_relevant_active_tasks), self.node_relevant_active_tasks)) + return False + if status in ("SLEEPING", "ABOUT TO SLEEP"): if host == self.hostname: ## We can only wake up tasks that are running on our own host bibsched_set_status(task_id, "CONTINUING", status) if not bibsched_send_signal(proc, task_id, signal.SIGCONT): bibsched_set_status(task_id, "ERROR", "CONTINUING") Log("Task #%d (%s) woken up but didn't existed anymore" % (task_id, proc)) return True Log("Task #%d (%s) woken up" % (task_id, proc)) return True else: return False elif procname in self.helper_modules: program = os.path.join(CFG_BINDIR, procname) ## Trick to log in bibsched.log the task exiting exit_str = '&& echo "`date "+%%Y-%%m-%%d %%H:%%M:%%S"` --> Task #%d (%s) exited" >> %s' % (task_id, proc, os.path.join(CFG_LOGDIR, 'bibsched.log')) command = "(%s %s > /dev/null 2> /dev/null %s)" % (program, str(task_id), exit_str) ### Set the task to scheduled and tie it to this host if self.tie_task_to_host(task_id): Log("Task #%d (%s) started" % (task_id, proc)) ### Relief the lock for the BibTask, it is safe now to do so spawn_task(command, wait=proc in CFG_BIBTASK_MONOTASKS) count = 10 while run_sql("""SELECT status FROM schTASK WHERE id=%s AND status='SCHEDULED'""", (task_id, )): ## Polling to wait for the task to really start, ## in order to avoid race conditions. if count <= 0: raise StandardError("Process %s (task_id: %s) was launched but seems not to be able to reach RUNNING status." % (proc, task_id)) time.sleep(CFG_BIBSCHED_REFRESHTIME) count -= 1 return True else: raise StandardError("%s is not in the allowed modules" % procname) else: ## It's not still safe to run the task. ## We first need to stop task that should be stopped ## and to put to sleep task that should be put to sleep for (other_task_id, other_proc, other_priority, other_status, other_sequenceid) in tasks_to_stop: Log("Send STOP signal to #%d (%s) which was in status %s" % (other_task_id, other_proc, other_status)) bibsched_set_status(other_task_id, 'ABOUT TO STOP', other_status) for (other_task_id, other_proc, other_priority, other_status, other_sequenceid) in tasks_to_sleep: Log("Send SLEEP signal to #%d (%s) which was in status %s" % (other_task_id, other_proc, other_status)) bibsched_set_status(other_task_id, 'ABOUT TO SLEEP', other_status) time.sleep(CFG_BIBSCHED_REFRESHTIME) return True def watch_loop(self): def check_errors(): sql = "SELECT count(id) FROM schTASK WHERE status='ERROR'" \ " OR status='DONE WITH ERRORS' OR STATUS='CERROR'" if run_sql(sql)[0][0] > 0: errors = run_sql("""SELECT id,proc,status FROM schTASK WHERE status = 'ERROR' OR status = 'DONE WITH ERRORS' OR status = 'CERROR'""") msg_errors = [" #%s %s -> %s" % row for row in errors] msg = 'BibTask with ERRORS:\n%s' % "\n".join(msg_errors) err_types = set(e[2] for e in errors if e[2]) if 'ERROR' in err_types or 'DONE WITH ERRORS' in err_types: raise StandardError(msg) else: raise RecoverableError(msg) def calculate_rows(): """Return all the node_relevant_active_tasks to work on.""" try: check_errors() except RecoverableError, msg: register_emergency('Light emergency from %s: BibTask failed: %s' % (CFG_SITE_URL, msg)) run_sql("UPDATE schTASK SET status='ERRORS REPORTED' WHERE status='CERROR'") - max_bibupload_priority = run_sql("SELECT max(priority) FROM schTASK WHERE status='WAITING' AND proc='bibupload' AND runtime<=NOW()") + max_bibupload_priority = run_sql( + """SELECT MAX(priority) + FROM schTASK + WHERE status IN ('WAITING', 'RUNNING', 'SLEEPING', + 'ABOUT TO STOP', 'ABOUT TO SLEEP', + 'SCHEDULED', 'CONTINUING') + AND proc = 'bibupload' + AND runtime <= NOW()""") if max_bibupload_priority: run_sql( - """UPDATE schTASK SET priority=%s - WHERE status='WAITING' AND proc='bibupload' - AND runtime<=NOW()""", (max_bibupload_priority[0][0], )) + """UPDATE schTASK SET priority = %s + WHERE status IN ('WAITING', 'RUNNING', 'SLEEPING', + 'ABOUT TO STOP', 'ABOUT TO SLEEP', + 'SCHEDULED', 'CONTINUING') + AND proc = 'bibupload' + AND runtime <= NOW() + AND priority < %s""", (max_bibupload_priority[0][0], + max_bibupload_priority[0][0])) ## The bibupload tasks are sorted by id, which means by the order they were scheduled self.node_relevant_bibupload_tasks = run_sql( """SELECT id, proc, runtime, status, priority, host, sequenceid - FROM schTASK WHERE status = 'WAITING' + FROM schTASK WHERE status IN ('WAITING', 'SLEEPING') AND proc = 'bibupload' AND runtime <= NOW() ORDER BY id ASC LIMIT 1""", n=1) ## The other tasks are sorted by priority self.node_relevant_waiting_tasks = run_sql( """SELECT id, proc, runtime, status, priority, host, sequenceid FROM schTASK WHERE (status='WAITING' AND runtime <= NOW()) OR status = 'SLEEPING' ORDER BY priority DESC, runtime ASC, id ASC""") + self.node_relevant_sleeping_tasks = run_sql( + """SELECT id, proc, runtime, status, priority, host, sequenceid + FROM schTASK WHERE status = 'SLEEPING' + ORDER BY priority DESC, runtime ASC, id ASC""") self.node_relevant_active_tasks = run_sql( """SELECT id, proc, runtime, status, priority, host,sequenceid FROM schTASK WHERE status IN ('RUNNING', 'CONTINUING', 'SCHEDULED', 'ABOUT TO STOP', 'ABOUT TO SLEEP')""") self.active_tasks_all_nodes = tuple(self.node_relevant_active_tasks) ## Remove tasks that can not be executed on this host self.filter_for_allowed_tasks() ## Cleaning up scheduled task not run because of bibsched being ## interrupted in the middle. run_sql("UPDATE schTASK SET status='WAITING' WHERE status='SCHEDULED'") try: while True: #Log("New bibsched cycle") calculate_rows() ## Let's first handle running node_relevant_active_tasks. for task in self.node_relevant_active_tasks: if self.handle_task(*task): break else: # If nothing has changed we can go on to run tasks. for task in self.node_relevant_waiting_tasks: if task[1] == 'bibupload' and self.node_relevant_bibupload_tasks: ## We switch in bibupload serial mode! ## which means we execute the first next bibupload. if self.handle_task(*self.node_relevant_bibupload_tasks[0]): ## Something has changed break elif self.handle_task(*task): ## Something has changed break else: time.sleep(CFG_BIBSCHED_REFRESHTIME) except Exception, err: register_exception(alert_admin=True) try: register_emergency('Emergency from %s: BibSched halted: %s' % (CFG_SITE_URL, err)) except NotImplementedError: pass raise class TimedOutExc(Exception): def __init__(self, value="Timed Out"): Exception.__init__(self) self.value = value def __str__(self): return repr(self.value) def timed_out(f, timeout, *args, **kwargs): def handler(signum, frame): raise TimedOutExc() old = signal.signal(signal.SIGALRM, handler) signal.alarm(timeout) try: result = f(*args, **kwargs) finally: signal.signal(signal.SIGALRM, old) signal.alarm(0) return result def Log(message): log = open(CFG_LOGDIR + "/bibsched.log", "a") log.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) log.write(message) log.write("\n") log.close() def redirect_stdout_and_stderr(): "This function redirects stdout and stderr to bibsched.log and bibsched.err file." old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = open(CFG_LOGDIR + "/bibsched.log", "a") sys.stderr = open(CFG_LOGDIR + "/bibsched.err", "a") return old_stdout, old_stderr + def restore_stdout_and_stderr(stdout, stderr): sys.stdout = stdout sys.stderr = stderr def usage(exitcode=1, msg=""): """Prints usage info.""" if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("""\ Usage: %s [options] [start|stop|restart|monitor|status] The following commands are available for bibsched: start start bibsched in background stop stop running bibtasks and the bibsched daemon safely halt halt running bibsched while keeping bibtasks running restart restart running bibsched monitor enter the interactive monitor status get report about current status of the queue purge purge the scheduler queue from old tasks General options: -h, --help \t Print this help. -V, --version \t Print version information. -q, --quiet \t Quiet mode -d, --debug \t Write debugging information in bibsched.log Status options: -s, --status=LIST\t Which BibTask status should be considered (default is Running,waiting) -S, --since=TIME\t Since how long time to consider tasks e.g.: 30m, 2h, 1d (default is all) -t, --tasks=LIST\t Comma separated list of BibTask to consider (default \t is all) Purge options: -s, --status=LIST\t Which BibTask status should be considered (default is DONE) -S, --since=TIME\t Since how long time to consider tasks e.g.: 30m, 2h, 1d (default is %s days) -t, --tasks=LIST\t Comma separated list of BibTask to consider (default \t is %s) """ % (sys.argv[0], CFG_BIBSCHED_GC_TASKS_OLDER_THAN, ','.join(CFG_BIBSCHED_GC_TASKS_TO_REMOVE + CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE))) sys.exit(exitcode) pidfile = os.path.join(CFG_PREFIX, 'var', 'run', 'bibsched.pid') def error(msg): print >> sys.stderr, "error: %s" % msg sys.exit(1) def warning(msg): print >> sys.stderr, "warning: %s" % msg def server_pid(ping_the_process=True, check_is_really_bibsched=True): # The pid must be stored on the filesystem try: pid = int(open(pidfile).read()) except IOError: return None if ping_the_process: # Even if the pid is available, we check if it corresponds to an # actual process, as it might have been killed externally try: os.kill(pid, signal.SIGCONT) except OSError: warning("pidfile %s found referring to pid %s which is not running" % (pidfile, pid)) return None if check_is_really_bibsched: output = run_shell_command("ps p %s -o args=", (str(pid), ))[1] if not 'bibsched' in output: warning("pidfile %s found referring to pid %s which does not correspond to bibsched: cmdline is %s" % (pidfile, pid, output)) return None return pid + def start(verbose=True, debug=False): """ Fork this process in the background and start processing requests. The process PID is stored in a pid file, so that it can be stopped later on.""" if verbose: sys.stdout.write("starting bibsched: ") sys.stdout.flush() pid = server_pid(ping_the_process=False) if pid: pid2 = server_pid() if pid2: error("another instance of bibsched (pid %d) is running" % pid2) else: warning("%s exist but the corresponding bibsched (pid %s) seems not be running" % (pidfile, pid)) warning("erasing %s and continuing..." % (pidfile, )) os.remove(pidfile) # start the child process using the "double fork" technique pid = os.fork() if pid > 0: sys.exit(0) os.setsid() os.chdir('/') pid = os.fork() if pid > 0: if verbose: sys.stdout.write('pid %d\n' % pid) Log("daemon started (pid %d)" % pid) open(pidfile, 'w').write('%d' % pid) return sys.stdin.close() redirect_stdout_and_stderr() sched = BibSched(debug=debug) try: sched.watch_loop() finally: try: os.remove(pidfile) except OSError: pass def halt(verbose=True, soft=False, debug=False): pid = server_pid() if not pid: if soft: print >> sys.stderr, 'bibsched seems not to be running.' return else: error('bibsched seems not to be running.') try: os.kill(pid, signal.SIGKILL) except OSError: print >> sys.stderr, 'no bibsched process found' Log("daemon stopped (pid %d)" % pid) if verbose: print "stopping bibsched: pid %d" % pid os.unlink(pidfile) + def monitor(verbose=True, debug=False): old_stdout, old_stderr = redirect_stdout_and_stderr() try: Manager(old_stdout) finally: restore_stdout_and_stderr(old_stdout, old_stderr) + def write_message(msg, stream=None, verbose=1): """Write message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff.""" if stream is None: stream = sys.stdout if msg: if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) try: stream.write("%s\n" % msg) except UnicodeEncodeError: stream.write("%s\n" % msg.encode('ascii', 'backslashreplace')) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) def report_queue_status(verbose=True, status=None, since=None, tasks=None): """ Report about the current status of BibSched queue on standard output. """ def report_about_processes(status='RUNNING', since=None, tasks=None): """ Helper function to report about processes with the given status. """ if tasks is None: task_query = '' else: task_query = 'AND proc IN (%s)' % ( ','.join([repr(real_escape_string(task)) for task in tasks])) if since is None: since_query = '' else: # We're not interested in future task if since.startswith('+') or since.startswith('-'): since = since[1:] since = '-' + since since_query = "AND runtime >= '%s'" % get_datetime(since) res = run_sql("""SELECT id, proc, user, runtime, sleeptime, status, progress, priority FROM schTASK WHERE status=%%s %(task_query)s %(since_query)s ORDER BY id ASC""" % { 'task_query' : task_query, 'since_query' : since_query }, (status,)) write_message("%s processes: %d" % (status, len(res))) for (proc_id, proc_proc, proc_user, proc_runtime, proc_sleeptime, proc_status, proc_progress, proc_priority) in res: write_message(' * ID="%s" PRIORITY="%s" PROC="%s" USER="%s" ' \ 'RUNTIME="%s" SLEEPTIME="%s" STATUS="%s" ' \ 'PROGRESS="%s"' % (proc_id, proc_priority, proc_proc, proc_user, proc_runtime, proc_sleeptime, proc_status, proc_progress)) return write_message("BibSched queue status report for %s:" % gethostname()) mode = server_pid() and "AUTOMATIC" or "MANUAL" write_message("BibSched queue running mode: %s" % mode) if status is None: report_about_processes('Running', since, tasks) report_about_processes('Waiting', since, tasks) else: for state in status: report_about_processes(state, since, tasks) write_message("Done.") -def restart(verbose = True, debug=False): + +def restart(verbose=True, debug=False): halt(verbose, soft=True, debug=debug) start(verbose, debug=debug) + def stop(verbose=True, debug=False): """ * Stop bibsched * Send stop signal to all the running tasks * wait for all the tasks to stop * return """ if verbose: print "Stopping BibSched if running" halt(verbose, soft=True, debug=debug) run_sql("UPDATE schTASK SET status='WAITING' WHERE status='SCHEDULED'") res = run_sql("""SELECT id, proc, status FROM schTASK WHERE status NOT LIKE 'DONE' AND status NOT LIKE '%_DELETED' AND (status='RUNNING' OR status='ABOUT TO STOP' OR status='ABOUT TO SLEEP' OR status='SLEEPING' OR status='CONTINUING')""") if verbose: print "Stopping all running BibTasks" for task_id, proc, status in res: if status == 'SLEEPING': bibsched_send_signal(proc, task_id, signal.SIGCONT) time.sleep(CFG_BIBSCHED_REFRESHTIME) bibsched_set_status(task_id, 'ABOUT TO STOP') while run_sql("""SELECT id FROM schTASK WHERE status NOT LIKE 'DONE' AND status NOT LIKE '%_DELETED' AND (status='RUNNING' OR status='ABOUT TO STOP' OR status='ABOUT TO SLEEP' OR status='SLEEPING' OR status='CONTINUING')"""): if verbose: sys.stdout.write('.') sys.stdout.flush() time.sleep(CFG_BIBSCHED_REFRESHTIME) if verbose: print "\nStopped" Log("BibSched and all BibTasks stopped") def main(): from invenio.bibtask import check_running_process_user check_running_process_user() verbose = True status = None since = None tasks = None debug = False try: opts, args = getopt.gnu_getopt(sys.argv[1:], "hVdqS:s:t:", [ "help", "version", "debug", "quiet", "since=", "status=", "task="]) except getopt.GetoptError, err: Log("Error: %s" % err) usage(1, err) for opt, arg in opts: if opt in ["-h", "--help"]: usage(0) elif opt in ["-V", "--version"]: print __revision__ sys.exit(0) elif opt in ['-q', '--quiet']: verbose = False elif opt in ['-s', '--status']: status = arg.split(',') elif opt in ['-S', '--since']: since = arg elif opt in ['-t', '--task']: tasks = arg.split(',') elif opt in ['-d', '--debug']: debug = True else: usage(1) try: cmd = args[0] except IndexError: cmd = 'monitor' try: if cmd in ('status', 'purge'): {'status' : report_queue_status, 'purge' : gc_tasks, }[cmd](verbose, status, since, tasks) else: {'start': start, 'halt': halt, 'stop': stop, 'restart': restart, 'monitor': monitor}[cmd](verbose=verbose, debug=debug) except KeyError: usage(1, 'unkown command: %s' % cmd) if __name__ == '__main__': main() diff --git a/modules/bibsched/lib/bibtask.py b/modules/bibsched/lib/bibtask.py index 822af1529..5d7cd3f73 100644 --- a/modules/bibsched/lib/bibtask.py +++ b/modules/bibsched/lib/bibtask.py @@ -1,1107 +1,1110 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Invenio Bibliographic Task Class. BibTask class. A BibTask is an executable under CFG_BINDIR, whose name is stored in bibtask_config.CFG_BIBTASK_VALID_TASKS. A valid task must call the task_init function with the proper parameters. Generic task related parameters (user, sleeptime, runtime, task_id, task_name verbose) go to _TASK_PARAMS global dictionary accessible through task_get_task_param. Option specific to the particular BibTask go to _OPTIONS global dictionary and are accessible via task_get_option/task_set_option. In order to log something properly, just use write_message(s) with the desired verbose level. task_update_status and task_update_progress can be used to update the status of the task (DONE, FAILED, DONE WITH ERRORS...) and it's progress (1 out 100..) within the bibsched monitor. It is possible to enqueue a BibTask via API call by means of task_low_level_submission. """ __revision__ = "$Id$" import getopt import getpass import marshal import os import pwd import re import signal import sys import time import datetime import traceback import logging import logging.handlers import random from invenio.dbquery import run_sql, _db_login from invenio.access_control_engine import acc_authorize_action from invenio.config import CFG_PREFIX, CFG_BINDIR, CFG_LOGDIR, \ CFG_BIBSCHED_PROCESS_USER, CFG_TMPDIR from invenio.errorlib import register_exception from invenio.access_control_config import CFG_EXTERNAL_AUTH_USING_SSO, \ CFG_EXTERNAL_AUTHENTICATION from invenio.webuser import get_user_preferences, get_email from invenio.bibtask_config import CFG_BIBTASK_VALID_TASKS, \ CFG_BIBTASK_DEFAULT_TASK_SETTINGS, CFG_BIBTASK_FIXEDTIMETASKS from invenio.dateutils import parse_runtime_limit from invenio.shellutils import escape_shell_arg # Global _TASK_PARAMS dictionary. _TASK_PARAMS = { 'version': '', 'task_stop_helper_fnc': None, 'task_name': os.path.basename(sys.argv[0]), 'task_specific_name': '', 'task_id': 0, 'user': '', # If the task is not initialized (usually a developer debugging # a single method), output all messages. 'verbose': 9, 'sleeptime': '', 'runtime': time.strftime("%Y-%m-%d %H:%M:%S"), 'priority': 0, 'runtime_limit': None, 'profile': [], 'post-process': [], 'sequence-id':None, 'stop_queue_on_error': False, 'fixed_time': False, } # Global _OPTIONS dictionary. _OPTIONS = {} # Which tasks don't need to ask the user for authorization? CFG_VALID_PROCESSES_NO_AUTH_NEEDED = ("bibupload", ) CFG_TASK_IS_NOT_A_DEAMON = ("bibupload", ) def fix_argv_paths(paths, argv=None): """Given the argv vector of cli parameters, and a list of path that can be relative and may have been specified within argv, it substitute all the occurencies of these paths in argv. argv is changed in place and returned. """ if argv is None: argv = sys.argv for path in paths: for count in xrange(len(argv)): if path == argv[count]: argv[count] = os.path.abspath(path) return argv def task_low_level_submission(name, user, *argv): """Let special lowlevel enqueuing of a task on the bibsche queue. @param name: is the name of the bibtask. It must be a valid executable under C{CFG_BINDIR}. @type name: string @param user: is a string that will appear as the "user" submitting the task. Since task are submitted via API it make sense to set the user to the name of the module/function that called task_low_level_submission. @type user: string @param argv: are all the additional CLI parameters that would have been passed on the CLI (one parameter per variable). e.g.: >>> task_low_level_submission('bibupload', 'admin', '-a', '/tmp/z.xml') @type: strings @return: the task identifier when the task is correctly enqueued. @rtype: int @note: use absolute paths in argv """ def get_priority(argv): """Try to get the priority by analysing the arguments.""" priority = 0 argv = list(argv) while True: try: opts, args = getopt.gnu_getopt(argv, 'P:', ['priority=']) except getopt.GetoptError, err: ## We remove one by one all the non recognized parameters if len(err.opt) > 1: argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)] else: argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)] else: break for opt in opts: if opt[0] in ('-P', '--priority'): try: priority = int(opt[1]) except ValueError: pass return priority def get_special_name(argv): """Try to get the special name by analysing the arguments.""" special_name = '' argv = list(argv) while True: try: opts, args = getopt.gnu_getopt(argv, 'N:', ['name=']) except getopt.GetoptError, err: ## We remove one by one all the non recognized parameters if len(err.opt) > 1: argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)] else: argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)] else: break for opt in opts: if opt[0] in ('-N', '--name'): special_name = opt[1] return special_name def get_runtime(argv): """Try to get the runtime by analysing the arguments.""" runtime = time.strftime("%Y-%m-%d %H:%M:%S") argv = list(argv) while True: try: opts, args = getopt.gnu_getopt(argv, 't:', ['runtime=']) except getopt.GetoptError, err: ## We remove one by one all the non recognized parameters if len(err.opt) > 1: argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)] else: argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)] else: break for opt in opts: if opt[0] in ('-t', '--runtime'): try: runtime = get_datetime(opt[1]) except ValueError: pass return runtime def get_sleeptime(argv): """Try to get the runtime by analysing the arguments.""" sleeptime = "" argv = list(argv) while True: try: opts, args = getopt.gnu_getopt(argv, 's:', ['sleeptime=']) except getopt.GetoptError, err: ## We remove one by one all the non recognized parameters if len(err.opt) > 1: argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)] else: argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)] else: break for opt in opts: if opt[0] in ('-s', '--sleeptime'): try: sleeptime = opt[1] except ValueError: pass return sleeptime def get_sequenceid(argv): """Try to get the sequenceid by analysing the arguments.""" sequenceid = None argv = list(argv) while True: try: opts, args = getopt.gnu_getopt(argv, 'I:', ['sequence-id=']) except getopt.GetoptError, err: ## We remove one by one all the non recognized parameters if len(err.opt) > 1: argv = [arg for arg in argv if arg != '--%s' % err.opt and not arg.startswith('--%s=' % err.opt)] else: argv = [arg for arg in argv if not arg.startswith('-%s' % err.opt)] else: break for opt in opts: if opt[0] in ('-I', '--sequence-id'): try: sequenceid = opt[1] except ValueError: pass return sequenceid task_id = None try: if not name in CFG_BIBTASK_VALID_TASKS: raise StandardError('%s is not a valid task name' % name) new_argv = [] for arg in argv: if isinstance(arg, unicode): arg = arg.encode('utf8') new_argv.append(arg) argv = new_argv priority = get_priority(argv) special_name = get_special_name(argv) runtime = get_runtime(argv) sleeptime = get_sleeptime(argv) sequenceid = get_sequenceid(argv) argv = tuple([os.path.join(CFG_BINDIR, name)] + list(argv)) if special_name: name = '%s:%s' % (name, special_name) verbose_argv = 'Will execute: %s' % ' '.join([escape_shell_arg(str(arg)) for arg in argv]) ## submit task: task_id = run_sql("""INSERT INTO schTASK (proc,user, runtime,sleeptime,status,progress,arguments,priority,sequenceid) VALUES (%s,%s,%s,%s,'WAITING',%s,%s,%s,%s)""", (name, user, runtime, sleeptime, verbose_argv, marshal.dumps(argv), priority, sequenceid)) except Exception: register_exception(alert_admin=True) if task_id: run_sql("""DELETE FROM schTASK WHERE id=%s""", (task_id, )) raise return task_id def bibtask_allocate_sequenceid(curdir=None): """ Returns an almost unique number to be used a task sequence ID. In WebSubmit functions, set C{curdir} to the curdir (!) to read the shared sequence ID for all functions of this submission (reading "access number"). @param curdir: in WebSubmit functions (ONLY) the value retrieved from the curdir parameter of the function @return: an integer for the sequence ID. 0 is returned if the sequence ID could not be allocated @rtype: int """ if curdir: try: fd = file(os.path.join(curdir, 'access'), "r") access = fd.readline().strip() fd.close() return access.replace("_", "")[-9:] except: return 0 else: return random.randrange(1, 4294967296) def setup_loggers(task_id=None): """Sets up the logging system.""" logger = logging.getLogger() for handler in logger.handlers: ## Let's clean the handlers in case some piece of code has already ## fired any write_message, i.e. any call to debug, info, etc. ## which triggered a call to logging.basicConfig() logger.removeHandler(handler) formatter = logging.Formatter('%(asctime)s --> %(message)s', '%Y-%m-%d %H:%M:%S') if task_id is not None: err_logger = logging.handlers.RotatingFileHandler(os.path.join(CFG_LOGDIR, 'bibsched_task_%d.err' % _TASK_PARAMS['task_id']), 'a', 1*1024*1024, 10) log_logger = logging.handlers.RotatingFileHandler(os.path.join(CFG_LOGDIR, 'bibsched_task_%d.log' % _TASK_PARAMS['task_id']), 'a', 1*1024*1024, 10) log_logger.setFormatter(formatter) log_logger.setLevel(logging.DEBUG) err_logger.setFormatter(formatter) err_logger.setLevel(logging.WARNING) logger.addHandler(err_logger) logger.addHandler(log_logger) stdout_logger = logging.StreamHandler(sys.stdout) stdout_logger.setFormatter(formatter) stdout_logger.setLevel(logging.DEBUG) stderr_logger = logging.StreamHandler(sys.stderr) stderr_logger.setFormatter(formatter) stderr_logger.setLevel(logging.WARNING) logger.addHandler(stderr_logger) logger.addHandler(stdout_logger) logger.setLevel(logging.INFO) return logger def task_init( authorization_action="", authorization_msg="", description="", help_specific_usage="", version=__revision__, specific_params=("", []), task_stop_helper_fnc=None, task_submit_elaborate_specific_parameter_fnc=None, task_submit_check_options_fnc=None, task_run_fnc=None): """ Initialize a BibTask. @param authorization_action: is the name of the authorization action connected with this task; @param authorization_msg: is the header printed when asking for an authorization password; @param description: is the generic description printed in the usage page; @param help_specific_usage: is the specific parameter help @param task_stop_fnc: is a function that will be called whenever the task is stopped @param task_submit_elaborate_specific_parameter_fnc: will be called passing a key and a value, for parsing specific cli parameters. Must return True if it has recognized the parameter. Must eventually update the options with bibtask_set_option; @param task_submit_check_options: must check the validity of options (via bibtask_get_option) once all the options where parsed; @param task_run_fnc: will be called as the main core function. Must return False in case of errors. """ global _TASK_PARAMS, _OPTIONS _TASK_PARAMS = { "version" : version, "task_stop_helper_fnc" : task_stop_helper_fnc, "task_name" : os.path.basename(sys.argv[0]), "task_specific_name" : '', "user" : '', "verbose" : 1, "sleeptime" : '', "runtime" : time.strftime("%Y-%m-%d %H:%M:%S"), "priority" : 0, "runtime_limit" : None, "profile" : [], "post-process": [], "sequence-id": None, "stop_queue_on_error": False, "fixed_time": False, } to_be_submitted = True if len(sys.argv) == 2 and sys.argv[1].isdigit(): _TASK_PARAMS['task_id'] = int(sys.argv[1]) argv = _task_get_options(_TASK_PARAMS['task_id'], _TASK_PARAMS['task_name']) to_be_submitted = False else: argv = sys.argv setup_loggers(_TASK_PARAMS.get('task_id')) task_name = os.path.basename(sys.argv[0]) if task_name not in CFG_BIBTASK_VALID_TASKS or os.path.realpath(os.path.join(CFG_BINDIR, task_name)) != os.path.realpath(sys.argv[0]): raise OSError("%s is not in the allowed modules" % sys.argv[0]) from invenio.errorlib import wrap_warn wrap_warn() if type(argv) is dict: # FIXME: REMOVE AFTER MAJOR RELEASE 1.0 # This is needed for old task submitted before CLI parameters # where stored in DB and _OPTIONS dictionary was stored instead. _OPTIONS = argv else: try: _task_build_params(_TASK_PARAMS['task_name'], argv, description, help_specific_usage, version, specific_params, task_submit_elaborate_specific_parameter_fnc, task_submit_check_options_fnc) except (SystemExit, Exception), err: if not to_be_submitted: register_exception(alert_admin=True) write_message("Error in parsing the parameters: %s." % err, sys.stderr) write_message("Exiting.", sys.stderr) task_update_status("ERROR") raise write_message('argv=%s' % (argv, ), verbose=9) write_message('_OPTIONS=%s' % (_OPTIONS, ), verbose=9) write_message('_TASK_PARAMS=%s' % (_TASK_PARAMS, ), verbose=9) if to_be_submitted: _task_submit(argv, authorization_action, authorization_msg) else: try: if task_get_task_param('profile'): try: from cStringIO import StringIO import pstats filename = os.path.join(CFG_TMPDIR, 'bibsched_task_%s.pyprof' % _TASK_PARAMS['task_id']) existing_sorts = pstats.Stats.sort_arg_dict_default.keys() required_sorts = [] profile_dump = [] for sort in task_get_task_param('profile'): if sort not in existing_sorts: sort = 'cumulative' if sort not in required_sorts: required_sorts.append(sort) if sys.hexversion < 0x02050000: import hotshot import hotshot.stats pr = hotshot.Profile(filename) ret = pr.runcall(_task_run, task_run_fnc) for sort_type in required_sorts: tmp_out = sys.stdout sys.stdout = StringIO() hotshot.stats.load(filename).strip_dirs().sort_stats(sort_type).print_stats() # pylint: disable=E1103 # This is a hack. sys.stdout is a StringIO in this case. profile_dump.append(sys.stdout.getvalue()) # pylint: enable=E1103 sys.stdout = tmp_out else: import cProfile pr = cProfile.Profile() ret = pr.runcall(_task_run, task_run_fnc) pr.dump_stats(filename) for sort_type in required_sorts: strstream = StringIO() pstats.Stats(filename, stream=strstream).strip_dirs().sort_stats(sort_type).print_stats() profile_dump.append(strstream.getvalue()) profile_dump = '\n'.join(profile_dump) profile_dump += '\nYou can use profile=%s' % existing_sorts open(os.path.join(CFG_LOGDIR, 'bibsched_task_%d.log' % _TASK_PARAMS['task_id']), 'a').write("%s" % profile_dump) os.remove(filename) except ImportError: ret = _task_run(task_run_fnc) write_message("ERROR: The Python Profiler is not installed!", stream=sys.stderr) else: ret = _task_run(task_run_fnc) if not ret: write_message("Error occurred. Exiting.", sys.stderr) except Exception, e: register_exception(alert_admin=True) write_message("Unexpected error occurred: %s." % e, sys.stderr) write_message("Traceback is:", sys.stderr) write_messages(''.join(traceback.format_tb(sys.exc_info()[2])), sys.stderr) write_message("Exiting.", sys.stderr) task_update_status("ERROR") logging.shutdown() def _task_build_params( task_name, argv, description="", help_specific_usage="", version=__revision__, specific_params=("", []), task_submit_elaborate_specific_parameter_fnc=None, task_submit_check_options_fnc=None): """ Build the BibTask params. @param argv: a list of string as in sys.argv @param description: is the generic description printed in the usage page; @param help_specific_usage: is the specific parameter help @param task_submit_elaborate_specific_parameter_fnc: will be called passing a key and a value, for parsing specific cli parameters. Must return True if it has recognized the parameter. Must eventually update the options with bibtask_set_option; @param task_submit_check_options: must check the validity of options (via bibtask_get_option) once all the options where parsed; """ global _OPTIONS _OPTIONS = {} if task_name in CFG_BIBTASK_DEFAULT_TASK_SETTINGS: _OPTIONS.update(CFG_BIBTASK_DEFAULT_TASK_SETTINGS[task_name]) # set user-defined options: try: (short_params, long_params) = specific_params opts, args = getopt.gnu_getopt(argv[1:], "hVv:u:s:t:P:N:L:I:" + short_params, [ "help", "version", "verbose=", "user=", "sleep=", "runtime=", "priority=", "name=", "limit=", "profile=", "post-process=", "sequence-id=", "stop-on-error", "continue-on-error", "fixed-time", ] + long_params) except getopt.GetoptError, err: _usage(1, err, help_specific_usage=help_specific_usage, description=description) try: for opt in opts: if opt[0] in ("-h", "--help"): _usage(0, help_specific_usage=help_specific_usage, description=description) elif opt[0] in ("-V", "--version"): print _TASK_PARAMS["version"] sys.exit(0) elif opt[0] in ("-u", "--user"): _TASK_PARAMS["user"] = opt[1] elif opt[0] in ("-v", "--verbose"): _TASK_PARAMS["verbose"] = int(opt[1]) elif opt[0] in ("-s", "--sleeptime"): if task_name not in CFG_TASK_IS_NOT_A_DEAMON: get_datetime(opt[1]) # see if it is a valid shift _TASK_PARAMS["sleeptime"] = opt[1] elif opt[0] in ("-t", "--runtime"): _TASK_PARAMS["runtime"] = get_datetime(opt[1]) elif opt[0] in ("-P", "--priority"): _TASK_PARAMS["priority"] = int(opt[1]) elif opt[0] in ("-N", "--name"): _TASK_PARAMS["task_specific_name"] = opt[1] elif opt[0] in ("-L", "--limit"): _TASK_PARAMS["runtime_limit"] = parse_runtime_limit(opt[1]) elif opt[0] in ("--profile", ): _TASK_PARAMS["profile"] += opt[1].split(',') elif opt[0] in ("--post-process", ): _TASK_PARAMS["post-process"] += [opt[1]]; elif opt[0] in ("-I","--sequence-id"): _TASK_PARAMS["sequence-id"] = opt[1] elif opt[0] in ("--stop-on-error", ): _TASK_PARAMS["stop_queue_on_error"] = True elif opt[0] in ("--continue-on-error", ): _TASK_PARAMS["stop_queue_on_error"] = False elif opt[0] in ("--fixed-time", ): _TASK_PARAMS["fixed_time"] = True elif not callable(task_submit_elaborate_specific_parameter_fnc) or \ not task_submit_elaborate_specific_parameter_fnc(opt[0], opt[1], opts, args): _usage(1, help_specific_usage=help_specific_usage, description=description) except StandardError, e: _usage(e, help_specific_usage=help_specific_usage, description=description) if callable(task_submit_check_options_fnc): if not task_submit_check_options_fnc(): _usage(1, help_specific_usage=help_specific_usage, description=description) def task_set_option(key, value): """Set an value to key in the option dictionary of the task""" global _OPTIONS try: _OPTIONS[key] = value except NameError: _OPTIONS = {key : value} def task_get_option(key, default=None): """Returns the value corresponding to key in the option dictionary of the task""" try: return _OPTIONS.get(key, default) except NameError: return default def task_has_option(key): """Map the has_key query to _OPTIONS""" try: return _OPTIONS.has_key(key) except NameError: return False def task_get_task_param(key, default=None): """Returns the value corresponding to the particular task param""" try: return _TASK_PARAMS.get(key, default) except NameError: return default def task_set_task_param(key, value): """Set the value corresponding to the particular task param""" global _TASK_PARAMS try: _TASK_PARAMS[key] = value except NameError: _TASK_PARAMS = {key : value} def task_update_progress(msg): """Updates progress information in the BibSched task table.""" write_message("Updating task progress to %s." % msg, verbose=9) if "task_id" in _TASK_PARAMS: return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, _TASK_PARAMS["task_id"])) def task_update_status(val): """Updates status information in the BibSched task table.""" write_message("Updating task status to %s." % val, verbose=9) if "task_id" in _TASK_PARAMS: return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, _TASK_PARAMS["task_id"])) def task_read_status(): """Read status information in the BibSched task table.""" res = run_sql("SELECT status FROM schTASK where id=%s", (_TASK_PARAMS['task_id'],), 1) try: out = res[0][0] except: out = 'UNKNOWN' return out def write_messages(msgs, stream=None, verbose=1): """Write many messages through write_message""" if stream is None: stream = sys.stdout for msg in msgs.split('\n'): write_message(msg, stream, verbose) def write_message(msg, stream=None, verbose=1): """Write message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff.""" if stream is None: stream = sys.stdout if msg and _TASK_PARAMS['verbose'] >= verbose: if stream == sys.stdout: logging.info(msg) elif stream == sys.stderr: logging.error(msg) else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) else: logging.debug(msg) _RE_SHIFT = re.compile("([-\+]{0,1})([\d]+)([dhms])") def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S", now=None): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" date = now or datetime.datetime.now() factors = {"d": 24 * 3600, "h": 3600, "m": 60, "s": 1} m = _RE_SHIFT.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) delta = sign * factor * value while delta > 0 and date < datetime.datetime.now(): date = date + datetime.timedelta(seconds=delta) date = date.strftime(format_string) else: date = time.strptime(var, format_string) date = time.strftime(format_string, date) return date def task_sleep_now_if_required(can_stop_too=False): """This function should be called during safe state of BibTask, e.g. after flushing caches or outside of run_sql calls. """ status = task_read_status() write_message('Entering task_sleep_now_if_required with status=%s' % status, verbose=9) if status == 'ABOUT TO SLEEP': write_message("sleeping...") task_update_status("SLEEPING") signal.signal(signal.SIGTSTP, _task_sig_dumb) os.kill(os.getpid(), signal.SIGSTOP) time.sleep(1) if task_read_status() == 'NOW STOP': if can_stop_too: write_message("stopped") task_update_status("STOPPED") sys.exit(0) else: write_message("stopping as soon as possible...") task_update_status('ABOUT TO STOP') else: write_message("... continuing...") task_update_status("CONTINUING") signal.signal(signal.SIGTSTP, _task_sig_sleep) elif status == 'ABOUT TO STOP' and can_stop_too: write_message("stopped") task_update_status("STOPPED") sys.exit(0) if can_stop_too: runtime_limit = task_get_option("limit") if runtime_limit is not None: if not (runtime_limit[0] <= time.time() <= runtime_limit[1]): write_message("stopped (outside runtime limit)") task_update_status("STOPPED") sys.exit(0) def authenticate(user, authorization_action, authorization_msg=""): """Authenticate the user against the user database. Check for its password, if it exists. Check for authorization_action access rights. Return user name upon authorization success, do system exit upon authorization failure. """ # With SSO it's impossible to check for pwd if CFG_EXTERNAL_AUTH_USING_SSO or os.path.basename(sys.argv[0]) in CFG_VALID_PROCESSES_NO_AUTH_NEEDED: return user if authorization_msg: print authorization_msg print "=" * len(authorization_msg) if user == "": print >> sys.stdout, "\rUsername: ", try: user = sys.stdin.readline().lower().strip() except EOFError: sys.stderr.write("\n") sys.exit(1) except KeyboardInterrupt: sys.stderr.write("\n") sys.exit(1) else: print >> sys.stdout, "\rUsername:", user ## first check user: # p_un passed may be an email or a nickname: res = run_sql("select id from user where email=%s", (user,), 1) + \ run_sql("select id from user where nickname=%s", (user,), 1) if not res: print "Sorry, %s does not exist." % user sys.exit(1) else: uid = res[0][0] ok = False login_method = get_user_preferences(uid)['login_method'] if not CFG_EXTERNAL_AUTHENTICATION[login_method]: #Local authentication, let's see if we want passwords. res = run_sql("select id from user where id=%s " "and password=AES_ENCRYPT(email,'')", (uid,), 1) if res: ok = True if not ok: try: password_entered = getpass.getpass() except EOFError: sys.stderr.write("\n") sys.exit(1) except KeyboardInterrupt: sys.stderr.write("\n") sys.exit(1) if not CFG_EXTERNAL_AUTHENTICATION[login_method]: res = run_sql("select id from user where id=%s " "and password=AES_ENCRYPT(email, %s)", (uid, password_entered), 1) if res: ok = True else: if CFG_EXTERNAL_AUTHENTICATION[login_method].auth_user(get_email(uid), password_entered): ok = True if not ok: print "Sorry, wrong credentials for %s." % user sys.exit(1) else: ## secondly check authorization for the authorization_action: (auth_code, auth_message) = acc_authorize_action(uid, authorization_action) if auth_code != 0: print auth_message sys.exit(1) return user def _task_submit(argv, authorization_action, authorization_msg): """Submits task to the BibSched task queue. This is what people will be invoking via command line.""" ## check as whom we want to submit? check_running_process_user() ## sanity check: remove eventual "task" option: ## authenticate user: _TASK_PARAMS['user'] = authenticate(_TASK_PARAMS["user"], authorization_action, authorization_msg) ## submit task: if _TASK_PARAMS['task_specific_name']: task_name = '%s:%s' % (_TASK_PARAMS['task_name'], _TASK_PARAMS['task_specific_name']) else: task_name = _TASK_PARAMS['task_name'] write_message("storing task options %s\n" % argv, verbose=9) verbose_argv = 'Will execute: %s' % ' '.join([escape_shell_arg(str(arg)) for arg in argv]) _TASK_PARAMS['task_id'] = run_sql("""INSERT INTO schTASK (proc,user, runtime,sleeptime,status,progress,arguments,priority,sequenceid) VALUES (%s,%s,%s,%s,'WAITING',%s,%s,%s,%s)""", (task_name, _TASK_PARAMS['user'], _TASK_PARAMS["runtime"], _TASK_PARAMS["sleeptime"], verbose_argv, marshal.dumps(argv), _TASK_PARAMS['priority'], _TASK_PARAMS['sequence-id'])) ## update task number: write_message("Task #%d submitted." % _TASK_PARAMS['task_id']) return _TASK_PARAMS['task_id'] def _task_get_options(task_id, task_name): """Returns options for the task 'id' read from the BibSched task queue table.""" out = {} res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc LIKE %s", (task_id, task_name+'%')) try: out = marshal.loads(res[0][0]) except: write_message("Error: %s task %d does not seem to exist." \ % (task_name, task_id), sys.stderr) task_update_status('ERROR') sys.exit(1) write_message('Options retrieved: %s' % (out, ), verbose=9) return out def _task_run(task_run_fnc): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call. The task prints Fibonacci numbers for up to NUM on the stdout, and some messages on stderr. @param task_run_fnc: will be called as the main core function. Must return False in case of errors. Return True in case of success and False in case of failure.""" from invenio.bibtasklet import _TASKLETS ## We prepare the pid file inside /prefix/var/run/taskname_id.pid check_running_process_user() try: pidfile_name = os.path.join(CFG_PREFIX, 'var', 'run', 'bibsched_task_%d.pid' % _TASK_PARAMS['task_id']) pidfile = open(pidfile_name, 'w') pidfile.write(str(os.getpid())) pidfile.close() except OSError: register_exception(alert_admin=True) task_update_status("ERROR") return False ## check task status: task_status = task_read_status() if task_status not in ("WAITING", "SCHEDULED"): write_message("Error: The task #%d is %s. I expected WAITING or SCHEDULED." % (_TASK_PARAMS['task_id'], task_status), sys.stderr) return False time_now = time.time() if _TASK_PARAMS['runtime_limit'] is not None and os.environ.get('BIBSCHED_MODE', 'manual') != 'manual': if not _TASK_PARAMS['runtime_limit'][0][0] <= time_now <= _TASK_PARAMS['runtime_limit'][0][1]: if time_now <= _TASK_PARAMS['runtime_limit'][0][0]: new_runtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(_TASK_PARAMS['runtime_limit'][0][0])) else: new_runtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(_TASK_PARAMS['runtime_limit'][1][0])) progress = run_sql("SELECT progress FROM schTASK WHERE id=%s", (_TASK_PARAMS['task_id'], )) if progress: progress = progress[0][0] else: progress = '' g = re.match(r'Postponed (\d+) time\(s\)', progress) if g: postponed_times = int(g.group(1)) else: postponed_times = 0 - run_sql("UPDATE schTASK SET runtime=%s, status='WAITING', progress=%s, host='' WHERE id=%s", (new_runtime, 'Postponed %d time(s)' % (postponed_times + 1), _TASK_PARAMS['task_id'])) + if _TASK_PARAMS['sequence-id']: + ## Also postponing other dependent tasks. + run_sql("UPDATE schTASK SET runtime=%s, progress=%s WHERE sequenceid=%s AND status='WAITING'", (new_runtime, 'Postponed as task %s' % _TASK_PARAMS['task_id'], _TASK_PARAMS['sequence-id'])) # kwalitee: disable=sql + run_sql("UPDATE schTASK SET runtime=%s, status='WAITING', progress=%s, host='' WHERE id=%s", (new_runtime, 'Postponed %d time(s)' % (postponed_times + 1), _TASK_PARAMS['task_id'])) # kwalitee: disable=sql write_message("Task #%d postponed because outside of runtime limit" % _TASK_PARAMS['task_id']) return True ## initialize signal handler: signal.signal(signal.SIGUSR2, signal.SIG_IGN) signal.signal(signal.SIGTSTP, _task_sig_sleep) signal.signal(signal.SIGTERM, _task_sig_stop) signal.signal(signal.SIGQUIT, _task_sig_stop) signal.signal(signal.SIGABRT, _task_sig_suicide) signal.signal(signal.SIGINT, _task_sig_stop) ## we can run the task now: write_message("Task #%d started." % _TASK_PARAMS['task_id']) task_update_status("RUNNING") ## run the task: _TASK_PARAMS['task_starting_time'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) sleeptime = _TASK_PARAMS['sleeptime'] try: try: if callable(task_run_fnc) and task_run_fnc(): task_update_status("DONE") else: task_update_status("DONE WITH ERRORS") except SystemExit: pass except: write_message(traceback.format_exc()[:-1]) register_exception(alert_admin=True) if task_get_task_param('stop_queue_on_error'): task_update_status("ERROR") else: task_update_status("CERROR") finally: task_status = task_read_status() if sleeptime: argv = _task_get_options(_TASK_PARAMS['task_id'], _TASK_PARAMS['task_name']) verbose_argv = 'Will execute: %s' % ' '.join([escape_shell_arg(str(arg)) for arg in argv]) # Here we check if the task can shift away of has to be run at # a fixed time if task_get_task_param('fixed_time') or _TASK_PARAMS['task_name'] in CFG_BIBTASK_FIXEDTIMETASKS: old_runtime = run_sql("SELECT runtime FROM schTASK WHERE id=%s", (_TASK_PARAMS['task_id'], ))[0][0] else: old_runtime = None new_runtime = get_datetime(sleeptime, now=old_runtime) ## The task is a daemon. We resubmit it if task_status == 'DONE': ## It has finished in a good way. We recycle the database row run_sql("UPDATE schTASK SET runtime=%s, status='WAITING', progress=%s, host='' WHERE id=%s", (new_runtime, verbose_argv, _TASK_PARAMS['task_id'])) write_message("Task #%d finished and resubmitted." % _TASK_PARAMS['task_id']) elif task_status == 'STOPPED': run_sql("UPDATE schTASK SET status='WAITING', progress=%s, host='' WHERE id=%s", (verbose_argv, _TASK_PARAMS['task_id'], )) write_message("Task #%d stopped and resubmitted." % _TASK_PARAMS['task_id']) else: ## We keep the bad result and we resubmit with another id. #res = run_sql('SELECT proc,user,sleeptime,arguments,priority FROM schTASK WHERE id=%s', (_TASK_PARAMS['task_id'], )) #proc, user, sleeptime, arguments, priority = res[0] #run_sql("""INSERT INTO schTASK (proc,user, #runtime,sleeptime,status,arguments,priority) #VALUES (%s,%s,%s,%s,'WAITING',%s, %s)""", #(proc, user, new_runtime, sleeptime, arguments, priority)) write_message("Task #%d finished but not resubmitted. [%s]" % (_TASK_PARAMS['task_id'], task_status)) else: ## we are done: write_message("Task #%d finished. [%s]" % (_TASK_PARAMS['task_id'], task_status)) ## Removing the pid os.remove(pidfile_name) #Lets call the post-process tasklets if task_get_task_param("post-process"): split = re.compile(r"(bst_.*)\[(.*)\]") for tasklet in task_get_task_param("post-process"): if not split.match(tasklet): # wrong syntax _usage(1, "There is an error in the post processing option " "for this task.") aux_tasklet = split.match(tasklet) _TASKLETS[aux_tasklet.group(1)](**eval("dict(%s)" % (aux_tasklet.group(2)))) return True def _usage(exitcode=1, msg="", help_specific_usage="", description=""): """Prints usage info.""" if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("Usage: %s [options]\n" % sys.argv[0]) if help_specific_usage: sys.stderr.write("Command options:\n") sys.stderr.write(help_specific_usage) sys.stderr.write(" Scheduling options:\n") sys.stderr.write(" -u, --user=USER\tUser name under which to submit this" " task.\n") sys.stderr.write(" -t, --runtime=TIME\tTime to execute the task. [default=now]\n" "\t\t\tExamples: +15s, 5m, 3h, 2002-10-27 13:57:26.\n") sys.stderr.write(" -s, --sleeptime=SLEEP\tSleeping frequency after" " which to repeat the task.\n" "\t\t\tExamples: 30m, 2h, 1d. [default=no]\n") sys.stderr.write(" --fixed-time\t\tAvoid drifting of execution time when using --sleeptime\n") sys.stderr.write(" -I, --sequence-id=SEQUENCE-ID\tSequence Id of the current process\n") sys.stderr.write(" -L --limit=LIMIT\tTime limit when it is" " allowed to execute the task.\n" "\t\t\tExamples: 22:00-03:00, Sunday 01:00-05:00.\n" "\t\t\tSyntax: [Wee[kday]] [hh[:mm][-hh[:mm]]].\n") sys.stderr.write(" -P, --priority=PRI\tTask priority (0=default, 1=higher, etc).\n") sys.stderr.write(" -N, --name=NAME\tTask specific name (advanced option).\n\n") sys.stderr.write(" General options:\n") sys.stderr.write(" -h, --help\t\tPrint this help.\n") sys.stderr.write(" -V, --version\t\tPrint version information.\n") sys.stderr.write(" -v, --verbose=LEVEL\tVerbose level (0=min," " 1=default, 9=max).\n") sys.stderr.write(" --profile=STATS\tPrint profile information. STATS is a comma-separated\n\t\t\tlist of desired output stats (calls, cumulative,\n\t\t\tfile, line, module, name, nfl, pcalls, stdname, time).\n") sys.stderr.write(" --stop-on-error\tIn case of unrecoverable error stop the bibsched queue.\n") sys.stderr.write(" --continue-on-error\tIn case of unrecoverable error don't stop the bibsched queue.\n") sys.stderr.write(" --post-process=BIB_TASKLET_NAME[parameters]\tPostprocesses the specified\n\t\t\tbibtasklet with the given parameters between square\n\t\t\tbrackets.\n") sys.stderr.write("\t\t\tExample:--post-process \"bst_send_email[fromaddr=\n\t\t\t'foo@xxx.com', toaddr='bar@xxx.com', subject='hello',\n\t\t\tcontent='help']\"\n") if description: sys.stderr.write(description) sys.exit(exitcode) def _task_sig_sleep(sig, frame): """Signal handler for the 'sleep' signal sent by BibSched.""" signal.signal(signal.SIGTSTP, signal.SIG_IGN) write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame), verbose=9) write_message("sleeping as soon as possible...") _db_login(relogin=1) task_update_status("ABOUT TO SLEEP") def _task_sig_stop(sig, frame): """Signal handler for the 'stop' signal sent by BibSched.""" write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame), verbose=9) write_message("stopping as soon as possible...") _db_login(relogin=1) # To avoid concurrency with an interrupted run_sql call task_update_status("ABOUT TO STOP") def _task_sig_suicide(sig, frame): """Signal handler for the 'suicide' signal sent by BibSched.""" write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame), verbose=9) write_message("suiciding myself now...") task_update_status("SUICIDING") write_message("suicided") _db_login(relogin=1) task_update_status("SUICIDED") sys.exit(1) def _task_sig_dumb(sig, frame): """Dumb signal handler.""" pass _RE_PSLINE = re.compile('^\s*(\w+)\s+(\w+)') def guess_apache_process_user_from_ps(): """Guess Apache process user by parsing the list of running processes.""" apache_users = [] try: # Tested on Linux, Sun and MacOS X for line in os.popen('ps -A -o user,comm').readlines(): g = _RE_PSLINE.match(line) if g: username = g.group(1) process = os.path.basename(g.group(2)) if process in ('apache', 'apache2', 'httpd') : if username not in apache_users and username != 'root': apache_users.append(username) except Exception, e: print >> sys.stderr, "WARNING: %s" % e return tuple(apache_users) def guess_apache_process_user(): """ Return the possible name of the user running the Apache server process. (Look at running OS processes or look at OS users defined in /etc/passwd.) """ apache_users = guess_apache_process_user_from_ps() + ('apache2', 'apache', 'www-data') for username in apache_users: try: userline = pwd.getpwnam(username) return userline[0] except KeyError: pass print >> sys.stderr, "ERROR: Cannot detect Apache server process user. Please set the correct value in CFG_BIBSCHED_PROCESS_USER." sys.exit(1) def check_running_process_user(): """ Check that the user running this program is the same as the user configured in CFG_BIBSCHED_PROCESS_USER or as the user running the Apache webserver process. """ running_as_user = pwd.getpwuid(os.getuid())[0] if CFG_BIBSCHED_PROCESS_USER: # We have the expected bibsched process user defined in config, # so check against her, not against Apache. if running_as_user != CFG_BIBSCHED_PROCESS_USER: print >> sys.stderr, """ERROR: You must run "%(x_proc)s" as the user set up in your CFG_BIBSCHED_PROCESS_USER (seems to be "%(x_user)s"). You may want to do "sudo -u %(x_user)s %(x_proc)s ..." to do so. If you think this is not right, please set CFG_BIBSCHED_PROCESS_USER appropriately and rerun "inveniocfg --update-config-py".""" % \ {'x_proc': os.path.basename(sys.argv[0]), 'x_user': CFG_BIBSCHED_PROCESS_USER} sys.exit(1) elif running_as_user != guess_apache_process_user(): # not defined in config, check against Apache print >> sys.stderr, """ERROR: You must run "%(x_proc)s" as the same user that runs your Apache server process (seems to be "%(x_user)s"). You may want to do "sudo -u %(x_user)s %(x_proc)s ..." to do so. If you think this is not right, please set CFG_BIBSCHED_PROCESS_USER appropriately and rerun "inveniocfg --update-config-py".""" % \ {'x_proc': os.path.basename(sys.argv[0]), 'x_user': guess_apache_process_user()} sys.exit(1) return diff --git a/modules/bibsword/lib/bibsword_client_http.py b/modules/bibsword/lib/bibsword_client_http.py index ceeef5fb5..b3cd79795 100644 --- a/modules/bibsword/lib/bibsword_client_http.py +++ b/modules/bibsword/lib/bibsword_client_http.py @@ -1,188 +1,188 @@ ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ''' BibSWORD Client Http Queries ''' import urllib2 from tempfile import NamedTemporaryFile from invenio.config import CFG_TMPDIR -from invenio.bibsword_config import CFG_DEFAULT_USER_AGENT +from invenio.urlutils import make_user_agent_string class RemoteSwordServer: '''This class gives every tools to communicate with the SWORD/APP deposit of ArXiv. ''' # static variable used to properly perform http request - agent = CFG_DEFAULT_USER_AGENT + agent = make_user_agent_string("BibSWORD") def __init__(self, authentication_infos): ''' This method the constructor of the class, it initialise the connection using a passord. That allows users to connect with auto-authentication. @param self: reference to the current instance of the class @param authentication_infos: dictionary with authentication infos containing keys: - realm: realm of the server - hostname: hostname of the server - username: name of an arxiv known user - password: password of the known user ''' #password manager with default realm to avoid looking for it passman = urllib2.HTTPPasswordMgrWithDefaultRealm() passman.add_password(authentication_infos['realm'], authentication_infos['hostname'], authentication_infos['username'], authentication_infos['password']) #create an authentificaiton handler authhandler = urllib2.HTTPBasicAuthHandler(passman) http_handler = urllib2.HTTPHandler(debuglevel=0) opener = urllib2.build_opener(authhandler, http_handler) # insalling : every call to opener will user the same user/pass urllib2.install_opener(opener) def get_remote_collection(self, url): ''' This method sent a request to the servicedocument to know the collections offer by arxives. @param self: reference to the current instance of the class @param url: the url where the request is made @return: (xml file) collection of arxiv allowed for the user ''' #format the request request = urllib2.Request(url) #launch request #try: response = urllib2.urlopen(request) #except urllib2.HTTPError: # return '' #except urllib2.URLError: # return '' return response.read() def deposit_media(self, media, collection, onbehalf): ''' This method allow the deposit of any type of media on a given arxiv collection. @param self: reference to the current instanc off the class @param media: dict of file info {'type', 'size', 'file'} @param collection: abreviation of the collection where to deposit @param onbehalf: user that make the deposition @return: (xml file) contains error ot the url of the temp file ''' #format the final deposit URL deposit_url = collection #prepare the header headers = {} headers['Content-Type'] = media['type'] headers['Content-Length'] = media['size'] #if on behalf, add to the header if onbehalf != '': headers['X-On-Behalf-Of'] = onbehalf headers['X-No-Op'] = 'True' headers['X-Verbose'] = 'True' - headers['User-Agent'] = CFG_DEFAULT_USER_AGENT + headers['User-Agent'] = self.agent #format the request result = urllib2.Request(deposit_url, media['file'], headers) #launch request try: return urllib2.urlopen(result).read() except urllib2.HTTPError: return '' def metadata_submission(self, deposit_url, metadata, onbehalf): ''' This method send the metadata to ArXiv, then return the answere @param metadata: xml file to submit to ArXiv @param onbehalf: specify the persone (and email) to informe of the publication ''' #prepare the header of the request headers = {} headers['Host'] = 'arxiv.org' - headers['User-Agent'] = CFG_DEFAULT_USER_AGENT + headers['User-Agent'] = self.agent headers['Content-Type'] = 'application/atom+xml;type=entry' #if on behalf, add to the header if onbehalf != '': headers['X-On-Behalf-Of'] = onbehalf headers['X-No-Op'] = 'True' headers['X-verbose'] = 'True' #format the request result = urllib2.Request(deposit_url, metadata, headers) #launch request try: response = urllib2.urlopen(result).read() except urllib2.HTTPError, e: tmpfd = NamedTemporaryFile(mode='w', suffix='.xml', prefix='bibsword_error_', dir=CFG_TMPDIR, delete=False) tmpfd.write(e.read()) tmpfd.close() return '' except urllib2.URLError: return '' return response def get_submission_status(self, status_url) : ''' This method get the xml file from the given URL and return it @param status_url: url where to get the status @return: xml atom entry containing the status ''' #format the http request request = urllib2.Request(status_url) request.add_header('Host', 'arxiv.org') - request.add_header('User-Agent', CFG_DEFAULT_USER_AGENT) + request.add_header('User-Agent', self.agent) #launch request try: response = urllib2.urlopen(request).read() except urllib2.HTTPError: return 'HTTPError (Might be an authentication issue)' except urllib2.URLError: return 'Wrong url' return response diff --git a/modules/bibsword/lib/bibsword_config.py b/modules/bibsword/lib/bibsword_config.py index 7e90d9d4f..5d90b7b44 100644 --- a/modules/bibsword/lib/bibsword_config.py +++ b/modules/bibsword/lib/bibsword_config.py @@ -1,146 +1,143 @@ ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ''' Forward to ArXiv.org source code ''' from invenio.bibformat_dblayer import get_tag_from_name #Maximal time to keep the stored XML Service doucment before reloading it in sec CFG_BIBSWORD_SERVICEDOCUMENT_UPDATE_TIME = 3600 -#Default user agent for invenio sword-client -CFG_DEFAULT_USER_AGENT = 'Invenio SWORD client' - #Default submission status CFG_SUBMISSION_STATUS_SUBMITTED = "submitted" CFG_SUBMISSION_STATUS_PUBLISHED = "published" CFG_SUBMISSION_STATUS_ONHOLD = "onhold" CFG_SUBMISSION_STATUS_REMOVED = "removed" CFG_SUBMIT_ARXIV_INFO_MESSAGE = "Submitted from Invenio to arXiv by %s, on %s, as %s" CFG_DOCTYPE_UPLOAD_COLLECTION = 'PUSHED_TO_ARXIV' # report number: marc_tag_main_report_number = get_tag_from_name('primary report number') if marc_tag_main_report_number: CFG_MARC_REPORT_NUMBER = marc_tag_main_report_number else: CFG_MARC_REPORT_NUMBER = '037__a' # title: marc_tag_title = get_tag_from_name('title') if marc_tag_title: CFG_MARC_TITLE = marc_tag_title else: CFG_MARC_TITLE = '245__a' # author name: marc_tag_author = get_tag_from_name('first author name') if marc_tag_author: CFG_MARC_AUTHOR_NAME = marc_tag_author else: CFG_MARC_AUTHOR_NAME = '100__a' # author affiliation marc_tag_author_affiliation = get_tag_from_name('first author affiliation') if marc_tag_author_affiliation: CFG_MARC_AUTHOR_AFFILIATION = marc_tag_author_affiliation else: CFG_MARC_AUTHOR_AFFILIATION = '100__u' # contributor name: marc_tag_contributor_name = get_tag_from_name('additional author name') if marc_tag_contributor_name: CFG_MARC_CONTRIBUTOR_NAME = marc_tag_contributor_name else: CFG_MARC_CONTRIBUTOR_NAME = '700_a' # contributor affiliation: marc_tag_contributor_affiliation = get_tag_from_name('additional author affiliation') if marc_tag_contributor_affiliation: CFG_MARC_CONTRIBUTOR_AFFILIATION = marc_tag_contributor_affiliation else: CFG_MARC_CONTRIBUTOR_AFFILIATION = '700_u' # abstract: marc_tag_abstract = get_tag_from_name('main abstract') if marc_tag_abstract: CFG_MARC_ABSTRACT = marc_tag_abstract else: CFG_MARC_ABSTRACT = '520__a' # additional report number marc_tag_additional_report_number = get_tag_from_name('additional report number') if marc_tag_additional_report_number: CFG_MARC_ADDITIONAL_REPORT_NUMBER = marc_tag_additional_report_number else: CFG_MARC_ADDITIONAL_REPORT_NUMBER = '088__a' # doi marc_tag_doi = get_tag_from_name('doi') if marc_tag_doi: CFG_MARC_DOI = marc_tag_doi else: CFG_MARC_DOI = '909C4a' # journal code marc_tag_journal_ref_code = get_tag_from_name('journal code') if marc_tag_journal_ref_code: CFG_MARC_JOURNAL_REF_CODE = marc_tag_journal_ref_code else: CFG_MARC_JOURNAL_REF_CODE = '909C4c' # journal reference title marc_tag_journal_ref_title = get_tag_from_name('journal title') if marc_tag_journal_ref_title: CFG_MARC_JOURNAL_REF_TITLE = marc_tag_journal_ref_title else: CFG_MARC_JOURNAL_REF_TITLE = '909C4p' # journal reference page marc_tag_journal_ref_page = get_tag_from_name('journal page') if marc_tag_journal_ref_page: CFG_MARC_JOURNAL_REF_PAGE = marc_tag_journal_ref_page else: CFG_MARC_JOURNAL_REF_PAGE = '909C4v' # journal reference year marc_tag_journal_ref_year = get_tag_from_name('journal year') if marc_tag_journal_ref_year: CFG_MARC_JOURNAL_REF_YEAR = marc_tag_journal_ref_year else: CFG_MARC_JOURNAL_REF_YEAR = '909C4y' # comment marc_tag_comment = get_tag_from_name('comment') if marc_tag_comment: CFG_MARC_COMMENT = marc_tag_comment else: CFG_MARC_COMMENT = '500__a' # internal note field marc_tag_internal_note = get_tag_from_name('internal notes') if marc_tag_internal_note: CFG_MARC_RECORD_SUBMIT_INFO = marc_tag_internal_note else: CFG_MARC_RECORD_SUBMIT_INFO = '595__a' diff --git a/modules/bibupload/doc/admin/bibupload-admin-guide.webdoc b/modules/bibupload/doc/admin/bibupload-admin-guide.webdoc index b864c0d92..4bf779b3a 100644 --- a/modules/bibupload/doc/admin/bibupload-admin-guide.webdoc +++ b/modules/bibupload/doc/admin/bibupload-admin-guide.webdoc @@ -1,557 +1,613 @@ ## -*- mode: html; coding: utf-8; -*- ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

Contents

1. Overview
2. Configuring BibUpload
3. Running BibUpload
       3.1. Inserting new records
       3.2. Inserting records into the Holding Pen
       3.3. Updating existing records
       3.4. Inserting and updating at the same time
       3.5. Updating preformatted output formats
       3.6. Uploading fulltext files
       3.7. Obtaining feedbacks
4. Batch Uploader
       4.1. Web interface - Cataloguers
       4.1. Web interface - Robots
       4.2. Daemon mode

1. Overview

BibUpload enables you to upload bibliographic data in MARCXML format into Invenio bibliographic database. It is also used internally by other Invenio modules as the sole entrance of metadata into the bibliographic databases.

Note that before uploading a MARCXML file, you may want to run provided /opt/invenio/bin/xmlmarclint on it in order to verify its correctness.

2. Configuring BibUpload

BibUpload takes a MARCXML file as its input. There is nothing to be configured for these files. If the files have to be coverted into MARCXML from some other format, structured or not, this is usually done beforehand via BibConvert module.

Note that if you are using external system numbers for your records, such as when your records are being synchronized from an external system, then BibUpload knows about the tag 970 as the one containing external system number. (To change this 970 tag into something else, you would have to edit BibUpload config source file.)

Note also that in the similar way BibUpload knows about OAI identifiers, so that it will refuse to insert the same OAI harvested record twice, for example.

3. Running BibUpload

3.1 Inserting new records

Consider that you have an MARCXML file containing new records that is to be uploaded into the Invenio. (For example, it might have been produced by BibConvert.) To finish the upload, you would call the BibUpload script in the insert mode as follows:

 $ bibupload -i file.xml
 
 
In the insert mode, all the records from the file will be treated as new. This means that they should not contain neither 001 tags (holding record IDs) nor 970 tags (holding external system numbers). BibUpload would refuse to upload records having these tags, in order to prevent potential double uploading. If your file does contain 001 or 970, then chances are that you want to update existing records, not re-upload them as new, and so BibUpload will warn you about this and will refuse to continue.

For example, to insert a new record, your file should look like this:

     <record>
         <datafield tag="100" ind1=" " ind2=" ">
             <subfield code="a">Doe, John</subfield>
         </datafield>
         <datafield tag="245" ind1=" " ind2=" ">
             <subfield code="a">On The Foo And Bar</subfield>
         </datafield>
     </record>
 

3.2 Inserting records into the Holding Pen

A special mode of BibUpload that is thigthly connected with BibEdit is the Holding Pen mode.

When you insert a record using the holding pen mode such as in the following example:

 $ bibupload -o file.xml
 
the records are not actually integrated into the database, but are instead put into an intermediate space called holding pen, where authorized curators can review them, manipulate them and eventually approve them.

The holding pen is integrated with BibEdit.

3.3 Updating existing records

When you want to update existing records, with the new content from your input MARCXML file, then your input file should contain either tags 001 (holding record IDs) or tag 970 (holding external system numbers). BibUpload will try to match existing records via 001 and 970 and if it finds a record in the database that corresponds to a record from the file, it will update its content. Otherwise it will signal an error saying that it could not find the record-to-be-updated.

For example, to update a title of record #123 via correct mode, your input file should contain record ID in the 001 tag and the title in 245 tag as follows:

     <record>
         <controlfield tag="001">123</controlfield>
         <datafield tag="245" ind1=" " ind2=" ">
             <subfield code="a">My Newly Updated Title</subfield>
         </datafield>
     </record>
 

There are several updating modes:

 
     -r, --replace Replace existing records by those from the XML
                   MARC file.  The original content is wiped out
                   and fully replaced.  Signals error if record
                   is not found via matching record IDs or system
                   numbers.
                   Fields defined in Invenio config variable
                   CFG_BIBUPLOAD_STRONG_TAGS are not replaced.
 
                   Note also that `-r' can be combined with `-i'
                   into an `-ir' option that would automatically
                   either insert records as new if they are not
                   found in the system, or correct existing
                   records if they are found to exist.
 
     -a, --append  Append fields from XML MARC file at the end of
                   existing records.  The original content is
                   enriched only.  Signals error if record is not
                   found via matching record IDs or system
                   numbers.
 
     -c, --correct Correct fields of existing records by those
                   from XML MARC file.  The original record
                   content is modified only on those fields from
                   the XML MARC file where both the tags and the
                   indicators match: the original fields are
                   removed and replaced by those from the XML
                   MARC file.  Fields not present in XML MARC
                   file are not changed (unlike the -r option).
                   Fields with "provenance" subfields defined in
                   'CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS'
                   are protected against deletion unless the
                   input MARCXML contains a matching
                   provenance value.
                   Signals error if record is not found via
                   matching record IDs or system numbers.
 
     -d, --delete  Delete fields of existing records that are
                   contained in the XML MARC file. The fields in
                   the original record that are not present in
                   the XML MARC file are preserved.
                   This is incompatible with FFT (see below).
 

Note that if you are using the --replace mode, and you specify in the incoming MARCXML a 001 tag with a value representing a record ID that does not exist, bibupload will not create the record on-the-fly unless the --force parameter was also passed on the command line. This is done in order to avoid creating, by mistake, holes in the database list of record identifiers. When you ask, in fact, to --replace a non-existing record imposing a record ID with a value of, say, 1 000 000 and, subsequently, you --insert a new record, this will automatically receive an ID with the value 1 000 001.

If you combine the --pretend parameter with the above updating modes you can actually test what would be executed without modifying the database or altering the system status.

3.4 Inserting and updating at the same time

Note that the insert/update modes can be combined together. For example, if you have a file that contains a mixture of new records with possibly some records to be updated, then you can run:

 $ bibupload -i -r file.xml
 
 
In this case BibUpload will try to do an update (for records having either 001 or 970 identifiers), or an insert (for the other ones).

3.5 Updating preformatted output formats

BibFormat can use this special upload mode during which metadata will not be updated, only the preformatted output formats for records:

     -f, --format        Upload only the format (FMT) fields.
                         The original content is not changed, and neither its modification date.
 
This is useful for bibreformat daemon only; human administrators don't need to explicitly know about this mode.

3.6 Uploading fulltext files

The fulltext files can be uploaded and revised via a special FFT ("fulltext file transfer") tag with the following semantic:

     FFT $a  ...  location of the docfile to upload (a filesystem path or a URL)
         $d  ...  docfile description (optional)
         $f  ...  format (optional; if not set, deduced from $a)
         $m  ...  new desired docfile name (optional; used for renaming files)
         $n  ...  docfile name (optional; if not set, deduced from $a)
         $o  ...  flag (repeatable subfield)
         $r  ...  restriction (optional, see below)
         $t  ...  docfile type (e.g. Main, Additional)
         $v  ...  version (used only with REVERT and DELETE-FILE, see below)
         $x  ...  url/path for an icon (optional)
         $z  ...  comment (optional)
 

For example, to upload a new fulltext file thesis.pdf associated to record ID 123:

     <record>
         <controlfield tag="001">123</controlfield>
         <datafield tag="FFT" ind1=" " ind2=" ">
             <subfield code="a">/tmp/thesis.pdf</subfield>
             <subfield code="t">Main</subfield>
             <subfield code="d">
               This is the fulltext version of my thesis in the PDF format.
               Chapter 5 still needs some revision.
             </subfield>
         </datafield>
     </record>
 

The FFT tag can be repetitive, so one can pass along another FFT tag instance containing a pointer to e.g. the thesis defence slides. The subfields of an FFT tag are non-repetitive.

When more than one FFT tag is specified for the same document (e.g. for adding more than one format at a time), if $t (docfile type), $m (new desired docfile name), $r (restriction), $v (version), $x (url/path for an icon), are specified, they should be identically specified for each single entry of FFT. E.g. if you want to specify an icon for a document with two formats (say .pdf and .doc), you'll write two FFT tags, both containing the same $x subfield.

The bibupload process, when it encounters FFT tags, will automatically populate fulltext storage space (/opt/invenio/var/data/files) and metadata record associated tables (bibrec_bibdoc, bibdoc) as appropriate. It will also enrich the 856 tags (URL tags) of the MARC metadata of the record in question with references to the latest versions of each file.

Note that for $a and $x subfields filesystem paths must be absolute (e.g. /tmp/icon.gif is valid, while Destkop/icon.gif is not) and they must be readable by the user/group of the bibupload process that will handle the FFT.

The bibupload process supports the usual modes correct, append, replace, insert with a semantic that is somewhat similar to the semantic of the metadata upload:

Metadata Fulltext
objects being uploaded MARC field instances characterized by tags (010-999) fulltext files characterized by unique file names (FFT $n)
insert insert new record; must not exist insert new files; must not exist
append append new tag instances for the given tag XXX, regardless of existing tag instances append new files, if filename (i.e. new format) not already present
correct correct tag instances for the given tag XXX; delete existing ones and replace with given ones correct files with the given filename; add new revision or delete file; if the docname does not exist the file is added
replace replace all tags, whatever XXX are replace all files, whatever filenames are
delete delete all existing tag instances not supported

Note, in append and insert mode,

$m
is ignored.

In order to rename a document just use the the correct mode specifing in the $n subfield the original docname that should be renamed and in $m the new name.

Special values can be assigned to the $t subfield.

ValueMeaning
PURGEIn order to purge previous file revisions (i.e. in order to keep only the latest file version), please use the correct mode with $n docname and $t PURGE as the special keyword.
DELETEIn order to delete all existing versions of a file, making it effectively hidden, please use the correct mode with $n docname and $t DELETE as the special keyword.
EXPUNGEIn order to expunge (i.e. remove completely, also from the filesystem) all existing versions of a file, making it effectively disappear, please use the correct mode with $n docname and $t EXPUNGE as the special keyword.
FIX-MARCIn order to synchronize MARC to the bibrec/bibdoc structure (e.g. after an update or a tweak in the database), please use the correct mode with $n docname and $t FIX-MARC as the special keyword.
FIX-ALLIn order to fix a record (i.e. put all its linked documents in a coherent state) and synchronize the MARC to the table, please use the correct mode with $n docname and $t FIX-ALL as the special keyword.
REVERTIn order to revert to a previous file revision (i.e. to create a new revision with the same content as some previous revision had), please use the correct mode with $n docname, $t REVERT as the special keyword and $v the number corresponding to the desired version.
DELETE-FILEIn order to delete a particular file added by mistake, please use the correct mode with $n docname, $t DELETE-FILE, specifing $v version and $f format. Note that this operation is not reversible. Note that if you don't spcify a version, the last version will be used.

In order to preserve previous comments and descriptions when correcting, please use the KEEP-OLD-VALUE special keyword with the desired $d and $z subfield.

The $r subfield can contain a string that can be use to restrict the given document. The same value must be specified for all the format of a given document. By default the keyword will be used as the status parameter for the "viewrestrdoc" action, which can be used to give access right/restriction to desired user. e.g. if you set the keyword "thesis", you can the connect the "thesisviewer" to the action "viewrestrdoc" with parameter "status" set to "thesis". Then all the user which are linked with the "thesisviewer" role will be able to download the document. Instead any other user which are not considered as authors for the given record will not be allowed. Note, if you use the keyword "KEEP-OLD-VALUE" the previous restrictions if applicable will be kept.

More advanced document-level restriction is indeed possible. If the value contains infact:

  • email: john.doe@example.org: then only the user having john.doe@example.org as email address will be authorized to access the given document.
  • group: example: then only users belonging to the local/external group example will be authorized to access the given document.
  • role: example: then only the users belonging to the WebAccess role example will be authorized to access the given document.
  • firerole: allow .../deny...: then only the users implicitly matched by the given firewall like role definition will be authorized to access the given document.
  • status: example: then only the users belonging to roles having an authorization for the WebAccess action viewrestrdoc with parameter status set to example will be authorized (that is exactly like setting $r to example).
Note, that authors (as defined in the record MARC) and superadmin are always authorized to access a document, no matter what is the given value of the status.

Some special flags might be set via FFT and associated with the current document by using the $o subfield. This feature is experimental. Currently only two flags are actively considered:

  • HIDDEN: used to specify that the file that is currently added (via revision or append) must be hidden, i.e. must not be visible to the world but only known by the system (e.g. to allow for fulltext indexing). This flag is permanently associated with the specific revision and format of the file being added.
  • PERFORM_HIDE_PREVIOUS: used to specify that, although the current file should be visible (unless the HIDDEN flag is also specified), any other previous revision of the document should receive the HIDDEN flag, and should thus be hidden to the world.

Note that each time bibupload is called on a record, the 8564 tags pointing to locally stored files are recreated on the basis of the full-text files connected to the record. Thus, if you whish to update some 8564 tag pointing to a locally managed file, the only way to perform this is through the FFT tag, not by editing 8564 directly.

3.7 Obtaining feedbacks

Sometimes, to implement a particular workflow or policy in a digital repository, it might be nice to receive an automatic machine friendly feedback that aknowledges the outcome of a bibupload execution. To this aim the --callback-url command line parameter can be used. This parameter expects a URL to be specified to which a JSON-serialized response will POSTed.

Say, you have an external service reachable via the URL http://www.example.org/accept_feedback. If the argument:

 --callback-url http://www.example.org/accept_feedback
 
is added to the usual bibupload call, at the end of the execution of the corresponding bibupload task, an HTTP POST request will be performed, if possible to the given URL, reporting the outcome of the bibupload execution as a JSON-serialized response with the following structure:
  • a JSON object with the following string -- value mapping:
    • string: results -- value: a JSON array whose values are all JSON objects with the following string -- value mapping:
      • recid: an integer number, representing the described record identifier (-1 if no record identifier can be retrieved)
      • success: either true or false depending on the success of the elaboration of the corresponding MARCXML
      • error_message: a string containing a human-friendly description of the error that caused the MARCXML elaboration to fail (in case success was having false value)
      • marcxml: in case of success, this contains the final MARCXML representation of the record
      • url: in case of success, this contains the final URL where the detailde representation of the record can be fetched (i.e. its canonical URL)

For example, a possible JSON response posted to a specified URL can look like:

 {
     "results": [
         {
             "recid": -1,
             "error_message": "ERROR: can not retrieve the record identifier",
             "success": false
         },
         {
             "recid": 1000,
             "error_message": "",
             "success": true,
             "marcxml": "1000...",
             "url": "http://www.example.org/record/1000"
         },
         ...
     ]
 }
 

Note that, currently, in case the specified URL can not be reached at the time of the POST request, the whole bibupload task will fail.

+

If you use the same callback URL to receive the feedback from more than one bibupload request +you might want to be able to correctly identify each bibupload call with the corresponding +feedback. For this reason you can pass to the bibupload call an additional argument: +

+--nonce VALUE
+
+where value can be any string you wish. Such string will be then added to the JSON structure, as in +(supposing you specified --nonce 1234): +
+{
+    "nonce": "1234",
+    "results": [
+        {
+            "recid": -1,
+            "error_message": "ERROR: can not retrieve the record identifier",
+            "success": false
+        },
+        {
+            "recid": 1000,
+            "error_message": "",
+            "success": true,
+            "marcxml": "1000...",
+            "url": "http://www.example.org/record/1000"
+        },
+        ...
+    ]
+}
+
+

4. Batch Uploader

4.1 Web interface - Cataloguers

The batchuploader web interface can be used either to upload metadata files or documents. Opposed to daemon mode, actions will be executed only once.

The available upload history displays metadata and document uploads using the web interface, not daemon mode.

4.2 Web interface - Robots

If it is needed to use the batch upload function from within command line, this can be achieved with a curl call, like:

 $ curl -F 'file=@localfile.xml' -F 'mode=-i' http://cdsweb.cern.ch/batchuploader/robotupload [-F 'callback_url=http://...'] -A invenio_webupload
 
 

This service provides (client, file) checking to assure the records are put into a collection the client has rights to.
To configure this permissions, check CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS variable in the configuration file.
The allowed user agents can also be defined using the CFG_BATCHUPLOADER_WEB_ROBOT_AGENT variable.

Note that you can receive machine-friendly feedbacks from the corresponding bibupload task that is launched by a given batchuploader request, by adding the optional POST field callback_url with the same semantic of the --callback-url command line parameter of bibupload (see the previous paragraph Obtaining feedbacks).

+

A second more RESTful interface is also available: it will suffice to append to the URL the specific mode (among "insert", +"append", "correct", "delete", "replace"), as in: +

+http://cdsweb.cern.ch/batchuploader/robotupload/insert
+
+

+

The callback_url argument can be put in query part of the URL as in: +

+http://cdsweb.cern.ch/batchuploader/robotupload/insert?callback_url=http://myhandler
+
+

+ +

The MARCXML content should then be specified as the body of the request. With curl this can be implemented as in: +

+$ curl -T localfile.xml http://cdsweb.cern.ch/batchuploader/robotupload/insert?callback_url=http://... -A invenio_webupload -H "Content-Type: application/marcxml+xml"
+
+ +

The nonce argument that can be passed to BibUpload as described in the previous paragraph can also be specified with both robotupload interfaces. E.g.: +

+$ curl -F 'file=@localfile.xml' -F 'nonce=1234' -F 'mode=-i' http://cdsweb.cern.ch/batchuploader/robotupload -F 'callback_url=http://...' -A invenio_webupload
+
+and +
+$ curl -T localfile.xml http://cdsweb.cern.ch/batchuploader/robotupload/insert?nonce=1234&callback_url=http://... -A invenio_webupload -H "Content-Type: application/marcxml+xml"
+
+

+

4.2 Daemon mode

The batchuploader daemon mode is intended to be a bibsched task for document or metadata upload. The parent directory where the daemon will look for folders metadata and documents must be specified in the invenio configuration file.

An example of how directories should be arranged, considering that invenio was installed in folder /opt/invenio would be:

      /opt/invenio/var/batchupload
             /opt/invenio/var/batchupload/documents
                     /opt/invenio/var/batchupload/documents/append
                     /opt/invenio/var/batchupload/documents/revise
             /opt/invenio/var/batchupload/metadata
                     /opt/invenio/var/batchupload/metadata/append
                     /opt/invenio/var/batchupload/metadata/correct
                     /opt/invenio/var/batchupload/metadata/insert
                     /opt/invenio/var/batchupload/metadata/replace
 

When running the batchuploader daemon there are two possible execution modes:

         -m,   --metadata    Look for metadata files in folders insert, append, correct and replace.
                             All files are uploaded and then moved to the corresponding DONE folder.
         -d,   --documents   Look for documents in folders append and revise. Uploaded files are then
                             moved to DONE folders if possible.
 
By default, metadata mode is used.

An example of invocation would be:

 $ batchuploader --documents
 
 

It is possible to program batch uploader to run periodically. Read the Howto-run guide to see how. diff --git a/modules/bibupload/lib/Makefile.am b/modules/bibupload/lib/Makefile.am index 46118e4f1..d46d299a0 100644 --- a/modules/bibupload/lib/Makefile.am +++ b/modules/bibupload/lib/Makefile.am @@ -1,30 +1,31 @@ ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. pylibdir = $(libdir)/python/invenio pylib_DATA = bibupload_config.py \ bibupload.py \ bibupload_regression_tests.py \ batchuploader_webinterface.py \ batchuploader_engine.py \ batchuploader_templates.py \ - batchuploader.py + batchuploader.py \ + batchuploader_regression_tests.py EXTRA_DIST = $(pylib_DATA) CLEANFILES = *~ *.tmp *.pyc diff --git a/modules/bibupload/lib/batchuploader_engine.py b/modules/bibupload/lib/batchuploader_engine.py index cdbc85d9a..29eb119e0 100644 --- a/modules/bibupload/lib/batchuploader_engine.py +++ b/modules/bibupload/lib/batchuploader_engine.py @@ -1,695 +1,711 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Batch Uploader core functions. Uploading metadata and documents. """ import os import pwd import grp import sys import time import tempfile import cgi import re import calendar from invenio.dbquery import run_sql, Error from invenio.access_control_engine import acc_authorize_action from invenio.webuser import collect_user_info, page_not_authorized from invenio.config import CFG_BINDIR, CFG_TMPSHAREDDIR, CFG_LOGDIR, \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \ CFG_OAI_ID_FIELD, CFG_BATCHUPLOADER_DAEMON_DIR, \ CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS, \ - CFG_BATCHUPLOADER_WEB_ROBOT_AGENT, \ + CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS, \ CFG_PREFIX, CFG_SITE_LANG from invenio.textutils import encode_for_xml from invenio.bibtask import task_low_level_submission from invenio.messages import gettext_set_language from invenio.textmarc2xmlmarc import transform_file from invenio.shellutils import run_shell_command from invenio.bibupload import xml_marc_to_records, bibupload import invenio.bibupload as bibupload_module from invenio.bibrecord import create_records, \ record_strip_empty_volatile_subfields, \ record_strip_empty_fields try: from cStringIO import StringIO except ImportError: from StringIO import StringIO PERMITTED_MODES = ['-i', '-r', '-c', '-a', '-ir', '--insert', '--replace', '--correct', '--append'] +_CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS_RE = re.compile(CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS) + def cli_allocate_record(req): req.content_type = "text/plain" req.send_http_header() # check IP and useragent: if not _check_client_ip(req): msg = "[ERROR] Sorry, client IP %s cannot use the service." % _get_client_ip(req) _log(msg) return _write(req, msg) if not _check_client_useragent(req): msg = "[ERROR] Sorry, this useragent cannot use the service." _log(msg) return _write(req, msg) recid = run_sql("insert into bibrec (creation_date,modification_date) values(NOW(),NOW())") return recid -def cli_upload(req, file_content=None, mode=None, callback_url=None): +def cli_upload(req, file_content=None, mode=None, callback_url=None, nonce=None): """ Robot interface for uploading MARC files """ req.content_type = "text/plain" req.send_http_header() # check IP and useragent: if not _check_client_ip(req): msg = "[ERROR] Sorry, client IP %s cannot use the service." % _get_client_ip(req) _log(msg) return _write(req, msg) if not _check_client_useragent(req): - msg = "[ERROR] Sorry, this useragent cannot use the service." + msg = "[ERROR] Sorry, the %s useragent cannot use the service." % _get_useragent(req) _log(msg) return _write(req, msg) - arg_file = file_content arg_mode = mode - if not arg_file: - msg = "[ERROR] Please specify file body to input." - _log(msg) - return _write(req, msg) if not arg_mode: msg = "[ERROR] Please specify upload mode to use." _log(msg) return _write(req, msg) if not arg_mode in PERMITTED_MODES: msg = "[ERROR] Invalid upload mode." _log(msg) return _write(req, msg) - if hasattr(arg_file, "filename"): - arg_file = arg_file.value + + arg_file = file_content + if hasattr(arg_file, 'read'): + ## We've been passed a readable file, e.g. req + arg_file = arg_file.read() + if not arg_file: + msg = "[ERROR] Please provide a body to your request." + _log(msg) + return _write(req, msg) else: - msg = "[ERROR] 'file' parameter must be a (single) file" - _log(msg) - return _write(req, msg) + if not arg_file: + msg = "[ERROR] Please specify file body to input." + _log(msg) + return _write(req, msg) + if hasattr(arg_file, "filename"): + arg_file = arg_file.value + else: + msg = "[ERROR] 'file' parameter must be a (single) file" + _log(msg) + return _write(req, msg) # write temporary file: tempfile.tempdir = CFG_TMPSHAREDDIR filename = tempfile.mktemp(prefix="batchupload_" + \ time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_") filedesc = open(filename, 'w') filedesc.write(arg_file) filedesc.close() # check if this client can run this file: client_ip = _get_client_ip(req) permitted_dbcollids = CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS[client_ip] if permitted_dbcollids != ['*']: # wildcard allow = _check_client_can_submit_file(client_ip, filename, req, 0) if not allow: msg = "[ERROR] Cannot submit such a file from this IP. (Wrong collection.)" _log(msg) return _write(req, msg) # check validity of marcxml xmlmarclint_path = CFG_BINDIR + '/xmlmarclint' xmlmarclint_output, dummy1, dummy2 = run_shell_command('%s %s' % (xmlmarclint_path, filename)) if xmlmarclint_output != 0: msg = "[ERROR] MARCXML is not valid." _log(msg) return _write(req, msg) + args = ['bibupload', "batchupload", arg_mode, filename] # run upload command if callback_url: - task_low_level_submission('bibupload', "batchupload", arg_mode, filename, "--callback-url", callback_url) - msg = "[INFO] %s %s %s %s %s" % ('bibupload', arg_mode, filename, "--callback-url", callback_url) - else: - task_low_level_submission('bibupload', "batchupload", arg_mode, filename) - msg = "[INFO] %s %s %s" % ('bibupload', arg_mode, filename) + args += ["--callback-url", callback_url] + if nonce: + args += ["--nonce", nonce] + task_low_level_submission(*args) + msg = "[INFO] %s" % ' '.join(args) _log(msg) return _write(req, msg) def metadata_upload(req, metafile=None, filetype=None, mode=None, exec_date=None, exec_time=None, metafilename=None, ln=CFG_SITE_LANG, priority="1"): """ Metadata web upload service. Get upload parameters and exec bibupload for the given file. Finally, write upload history. @return: tuple (error code, message) error code: code that indicates if an error ocurred message: message describing the error """ # start output: req.content_type = "text/html" req.send_http_header() error_codes = {'not_authorized': 1, 'invalid_marc': 2} # write temporary file: if filetype == 'marcxml': metafile = metafile.value else: metafile = _transform_input_to_marcxml(file_input=metafile.value) user_info = collect_user_info(req) tempfile.tempdir = CFG_TMPSHAREDDIR filename = tempfile.mktemp(prefix="batchupload_" + \ user_info['nickname'] + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_") filedesc = open(filename, 'w') filedesc.write(metafile) filedesc.close() # check if this client can run this file: if req is not None: allow = _check_client_can_submit_file(req=req, metafile=metafile, webupload=1, ln=ln) if allow[0] != 0: return (error_codes['not_authorized'], allow[1]) # check MARCXML validity if filetype == 'marcxml': # check validity of marcxml xmlmarclint_path = CFG_BINDIR + '/xmlmarclint' xmlmarclint_output, dummy1, dummy2 = run_shell_command('%s %s' % (xmlmarclint_path, filename)) if xmlmarclint_output != 0: msg = "[ERROR] MARCXML is not valid." return (error_codes['invalid_marc'], msg) # run upload command: if exec_date: date = exec_date if exec_time: date += ' ' + exec_time jobid = task_low_level_submission('bibupload', user_info['nickname'], mode, "--name=" + metafilename, "--priority=" + priority, "-t", date, filename) else: jobid = task_low_level_submission('bibupload', user_info['nickname'], mode, "--name=" + metafilename, "--priority=" + priority, filename) # write batch upload history run_sql("""INSERT INTO hstBATCHUPLOAD (user, submitdate, filename, execdate, id_schTASK, batch_mode) VALUES (%s, NOW(), %s, %s, %s, "metadata")""", (user_info['nickname'], metafilename, exec_date != "" and (exec_date + ' ' + exec_time) or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid), )) return (0, "Task %s queued" % str(jobid)) def document_upload(req=None, folder="", matching="", mode="", exec_date="", exec_time="", ln=CFG_SITE_LANG, priority="1"): """ Take files from the given directory and upload them with the appropiate mode. @parameters: + folder: Folder where the files to upload are stored + matching: How to match file names with record fields (report number, barcode,...) + mode: Upload mode (append, revise, replace) @return: tuple (file, error code) file: file name causing the error to notify the user error code: 1 - More than one possible recID, ambiguous behaviour 2 - No records match that file name 3 - File already exists """ import sys if sys.hexversion < 0x2060000: from md5 import md5 else: from hashlib import md5 from invenio.bibdocfile import BibRecDocs, file_strip_ext import shutil from invenio.search_engine import perform_request_search, \ search_pattern, \ guess_collection_of_a_record _ = gettext_set_language(ln) errors = [] info = [0, []] # Number of files read, name of the files try: files = os.listdir(folder) except OSError, error: errors.append(("", error)) return errors, info err_desc = {1: _("More than one possible recID, ambiguous behaviour"), 2: _("No records match that file name"), 3: _("File already exists"), 4: _("A file with the same name and format already exists"), 5: _("No rights to upload to collection '%s'")} # Create directory DONE/ if doesn't exist folder = (folder[-1] == "/") and folder or (folder + "/") files_done_dir = folder + "DONE/" try: os.mkdir(files_done_dir) except OSError: # Directory exists or no write permission pass for docfile in files: if os.path.isfile(os.path.join(folder, docfile)): info[0] += 1 identifier = file_strip_ext(docfile) extension = docfile[len(identifier):] rec_id = None if identifier: rec_id = search_pattern(p=identifier, f=matching, m='e') if not rec_id: errors.append((docfile, err_desc[2])) continue elif len(rec_id) > 1: errors.append((docfile, err_desc[1])) continue else: rec_id = str(list(rec_id)[0]) rec_info = BibRecDocs(rec_id) if rec_info.bibdocs: for bibdoc in rec_info.bibdocs: attached_files = bibdoc.list_all_files() file_md5 = md5(open(os.path.join(folder, docfile), "rb").read()).hexdigest() num_errors = len(errors) for attached_file in attached_files: if attached_file.checksum == file_md5: errors.append((docfile, err_desc[3])) break elif attached_file.fullname == docfile: errors.append((docfile, err_desc[4])) break if len(errors) > num_errors: continue # Check if user has rights to upload file if req is not None: file_collection = guess_collection_of_a_record(int(rec_id)) auth_code, auth_message = acc_authorize_action(req, 'runbatchuploader', collection=file_collection) if auth_code != 0: error_msg = err_desc[5] % file_collection errors.append((docfile, error_msg)) continue tempfile.tempdir = CFG_TMPSHAREDDIR # Move document to be uploaded to temporary folder tmp_file = tempfile.mktemp(prefix=identifier + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_", suffix=extension) shutil.copy(os.path.join(folder, docfile), tmp_file) # Create MARC temporary file with FFT tag and call bibupload filename = tempfile.mktemp(prefix=identifier + '_') filedesc = open(filename, 'w') marc_content = """ %(rec_id)s %(name)s %(path)s """ % {'rec_id': rec_id, 'name': encode_for_xml(identifier), 'path': encode_for_xml(tmp_file), } filedesc.write(marc_content) filedesc.close() info[1].append(docfile) user = "" if req is not None: user_info = collect_user_info(req) user = user_info['nickname'] if not user: user = "batchupload" # Execute bibupload with the appropiate mode if exec_date: date = '--runtime=' + "\'" + exec_date + ' ' + exec_time + "\'" jobid = task_low_level_submission('bibupload', user, "--" + mode, "--name=" + docfile, "--priority=" + priority, date, filename) else: jobid = task_low_level_submission('bibupload', user, "--" + mode, "--name=" + docfile, "--priority=" + priority, filename) # write batch upload history run_sql("""INSERT INTO hstBATCHUPLOAD (user, submitdate, filename, execdate, id_schTASK, batch_mode) VALUES (%s, NOW(), %s, %s, %s, "document")""", (user_info['nickname'], docfile, exec_date != "" and (exec_date + ' ' + exec_time) or time.strftime("%Y-%m-%d %H:%M:%S"), str(jobid))) # Move file to DONE folder done_filename = docfile + "_" + time.strftime("%Y%m%d%H%M%S", time.localtime()) + "_" + str(jobid) try: os.rename(os.path.join(folder, docfile), os.path.join(files_done_dir, done_filename)) except OSError: errors.append('MoveError') return errors, info def get_user_metadata_uploads(req): """Retrieve all metadata upload history information for a given user""" user_info = collect_user_info(req) upload_list = run_sql("""SELECT DATE_FORMAT(h.submitdate, '%%Y-%%m-%%d %%H:%%i:%%S'), \ h.filename, DATE_FORMAT(h.execdate, '%%Y-%%m-%%d %%H:%%i:%%S'), \ s.status \ FROM hstBATCHUPLOAD h INNER JOIN schTASK s \ ON h.id_schTASK = s.id \ WHERE h.user=%s and h.batch_mode="metadata" ORDER BY h.submitdate DESC""", (user_info['nickname'],)) return upload_list def get_user_document_uploads(req): """Retrieve all document upload history information for a given user""" user_info = collect_user_info(req) upload_list = run_sql("""SELECT DATE_FORMAT(h.submitdate, '%%Y-%%m-%%d %%H:%%i:%%S'), \ h.filename, DATE_FORMAT(h.execdate, '%%Y-%%m-%%d %%H:%%i:%%S'), \ s.status \ FROM hstBATCHUPLOAD h INNER JOIN schTASK s \ ON h.id_schTASK = s.id \ WHERE h.user=%s and h.batch_mode="document" ORDER BY h.submitdate DESC""", (user_info['nickname'],)) return upload_list def get_daemon_doc_files(): """ Return all files found in batchuploader document folders """ files = {} for folder in ['/revise', '/append']: try: daemon_dir = CFG_BATCHUPLOADER_DAEMON_DIR[0] == '/' and CFG_BATCHUPLOADER_DAEMON_DIR \ or CFG_PREFIX + '/' + CFG_BATCHUPLOADER_DAEMON_DIR directory = daemon_dir + '/documents' + folder files[directory] = [(filename, []) for filename in os.listdir(directory) if os.path.isfile(os.path.join(directory, filename))] for file_instance, info in files[directory]: stat_info = os.lstat(os.path.join(directory, file_instance)) info.append("%s" % pwd.getpwuid(stat_info.st_uid)[0]) # Owner info.append("%s" % grp.getgrgid(stat_info.st_gid)[0]) # Group info.append("%d" % stat_info.st_size) # Size time_stat = stat_info.st_mtime time_fmt = "%Y-%m-%d %R" info.append(time.strftime(time_fmt, time.gmtime(time_stat))) # Last modified except OSError: pass return files def get_daemon_meta_files(): """ Return all files found in batchuploader metadata folders """ files = {} for folder in ['/correct', '/replace', '/insert', '/append']: try: daemon_dir = CFG_BATCHUPLOADER_DAEMON_DIR[0] == '/' and CFG_BATCHUPLOADER_DAEMON_DIR \ or CFG_PREFIX + '/' + CFG_BATCHUPLOADER_DAEMON_DIR directory = daemon_dir + '/metadata' + folder files[directory] = [(filename, []) for filename in os.listdir(directory) if os.path.isfile(os.path.join(directory, filename))] for file_instance, info in files[directory]: stat_info = os.lstat(os.path.join(directory, file_instance)) info.append("%s" % pwd.getpwuid(stat_info.st_uid)[0]) # Owner info.append("%s" % grp.getgrgid(stat_info.st_gid)[0]) # Group info.append("%d" % stat_info.st_size) # Size time_stat = stat_info.st_mtime time_fmt = "%Y-%m-%d %R" info.append(time.strftime(time_fmt, time.gmtime(time_stat))) # Last modified except OSError: pass return files def check_date(date): """ Check if date is correct @return: 0 - Default or correct date 3 - Incorrect format 4 - Date does not exist """ if not date or date == "yyyy-mm-dd": return 0 correct_format = re.match("2[01]\d\d-[01]?\d-[0-3]?\d", date) if not correct_format: return 3 #separate year, month, day date = correct_format.group(0).split("-") try: calendar.weekday(int(date[0]), int(date[1]), int(date[2])) except ValueError: return 4 return 0 def check_time(time): """ Check if time is correct @return: 0 - Default or correct time 1 - Incorrect format """ if not time or time == "hh:mm:ss": return 0 correct_format = re.match("[0-2]\d:[0-5]\d:[0-5]\d", time) if not correct_format: return 1 return 0 def user_authorization(req, ln): """ Check user authorization to visit page """ _ = gettext_set_language(ln) user_info = collect_user_info(req) auth_code, auth_message = acc_authorize_action(req, 'runbatchuploader') if auth_code != 0: referer = '/batchuploader/' if user_info['email'] == 'guest': error_msg = _("Guests are not authorized to run batchuploader") else: error_msg = _("The user '%s' is not authorized to run batchuploader" % \ (cgi.escape(user_info['nickname']))) return page_not_authorized(req=req, referer=referer, text=auth_message, navmenuid="batchuploader") else: return None def perform_basic_upload_checks(xml_record): """ Performs tests that would provoke the bibupload task to fail with an exit status 1, to prevent batchupload from crashing while alarming the user wabout the issue """ from bibupload import writing_rights_p errors = [] if not writing_rights_p(): errors.append("Error: BibUpload does not have rights to write fulltext files.") recs = create_records(xml_record, 1, 1) if recs == []: errors.append("Error: Cannot parse MARCXML file.") elif recs[0][0] is None: errors.append("Error: MARCXML file has wrong format: %s" % recs) return errors def perform_upload_check(xml_record, mode): """ Performs a upload simulation with the given record and mode @return: string describing errors @rtype: string """ error_cache = [] def my_writer(msg, stream=sys.stdout, verbose=1): if verbose == 1: if 'DONE' not in msg: error_cache.append(msg.strip()) orig_writer = bibupload_module.write_message bibupload_module.write_message = my_writer error_cache.extend(perform_basic_upload_checks(xml_record)) if error_cache: # There has been some critical error return '\n'.join(error_cache) recs = xml_marc_to_records(xml_record) try: upload_mode = mode[2:] # Adapt input data for bibupload function if upload_mode == "r insert-or-replace": upload_mode = "replace_or_insert" for record in recs: if record: record_strip_empty_volatile_subfields(record) record_strip_empty_fields(record) bibupload(record, opt_mode=upload_mode, pretend=True) finally: bibupload_module.write_message = orig_writer return '\n'.join(error_cache) +def _get_useragent(req): + """Return client user agent from req object.""" + user_info = collect_user_info(req) + return user_info['agent'] + def _get_client_ip(req): """Return client IP address from req object.""" return str(req.remote_ip) def _check_client_ip(req): """ Is this client permitted to use the service? """ client_ip = _get_client_ip(req) if client_ip in CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS.keys(): return True return False def _check_client_useragent(req): """ Is this user agent permitted to use the service? """ - user_info = collect_user_info(req) - client_useragent = user_info['agent'] - if client_useragent in CFG_BATCHUPLOADER_WEB_ROBOT_AGENT: + client_useragent = _get_useragent(req) + if _CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS_RE.match(client_useragent): return True return False def _check_client_can_submit_file(client_ip="", metafile="", req=None, webupload=0, ln=CFG_SITE_LANG): """ Is this client able to upload such a FILENAME? check 980 $a values and collection tags in the file to see if they are among the permitted ones as specified by CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS and ACC_AUTHORIZE_ACTION. Useful to make sure that the client does not override other records by mistake. """ from invenio.bibrecord import create_records _ = gettext_set_language(ln) recs = create_records(metafile, 0, 0) user_info = collect_user_info(req) filename_tag980_values = _detect_980_values_from_marcxml_file(recs) for filename_tag980_value in filename_tag980_values: if not filename_tag980_value: if not webupload: return False else: return(1, "Invalid collection in tag 980") if not webupload: if not filename_tag980_value in CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS[client_ip]: return False else: auth_code, auth_message = acc_authorize_action(req, 'runbatchuploader', collection=filename_tag980_value) if auth_code != 0: error_msg = _("The user '%(x_user)s' is not authorized to modify collection '%(x_coll)s'") % \ {'x_user': user_info['nickname'], 'x_coll': filename_tag980_value} return (auth_code, error_msg) filename_rec_id_collections = _detect_collections_from_marcxml_file(recs) for filename_rec_id_collection in filename_rec_id_collections: if not webupload: if not filename_rec_id_collection in CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS[client_ip]: return False else: auth_code, auth_message = acc_authorize_action(req, 'runbatchuploader', collection=filename_rec_id_collection) if auth_code != 0: error_msg = _("The user '%(x_user)s' is not authorized to modify collection '%(x_coll)s'") % \ {'x_user': user_info['nickname'], 'x_coll': filename_rec_id_collection} return (auth_code, error_msg) if not webupload: return True else: return (0, " ") def _detect_980_values_from_marcxml_file(recs): """ Read MARCXML file and return list of 980 $a values found in that file. Useful for checking rights. """ from invenio.bibrecord import record_get_field_values collection_tag = run_sql("SELECT value FROM tag, field_tag, field \ WHERE tag.id=field_tag.id_tag AND \ field_tag.id_field=field.id AND \ field.code='collection'") collection_tag = collection_tag[0][0] dbcollids = {} for rec, dummy1, dummy2 in recs: if rec: for tag980 in record_get_field_values(rec, tag=collection_tag[:3], ind1=collection_tag[3], ind2=collection_tag[4], code=collection_tag[5]): dbcollids[tag980] = 1 return dbcollids.keys() def _detect_collections_from_marcxml_file(recs): """ Extract all possible recIDs from MARCXML file and guess collections for these recIDs. """ from invenio.bibrecord import record_get_field_values from invenio.search_engine import guess_collection_of_a_record from invenio.bibupload import find_record_from_sysno, \ find_records_from_extoaiid, \ find_record_from_oaiid dbcollids = {} sysno_tag = CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG oaiid_tag = CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG oai_tag = CFG_OAI_ID_FIELD for rec, dummy1, dummy2 in recs: if rec: for tag001 in record_get_field_values(rec, '001'): collection = guess_collection_of_a_record(int(tag001)) dbcollids[collection] = 1 for tag_sysno in record_get_field_values(rec, tag=sysno_tag[:3], ind1=sysno_tag[3], ind2=sysno_tag[4], code=sysno_tag[5]): record = find_record_from_sysno(tag_sysno) if record: collection = guess_collection_of_a_record(int(record)) dbcollids[collection] = 1 for tag_oaiid in record_get_field_values(rec, tag=oaiid_tag[:3], ind1=oaiid_tag[3], ind2=oaiid_tag[4], code=oaiid_tag[5]): try: records = find_records_from_extoaiid(tag_oaiid) except Error: records = [] if records: record = records.pop() collection = guess_collection_of_a_record(int(record)) dbcollids[collection] = 1 for tag_oai in record_get_field_values(rec, tag=oai_tag[0:3], ind1=oai_tag[3], ind2=oai_tag[4], code=oai_tag[5]): record = find_record_from_oaiid(tag_oai) if record: collection = guess_collection_of_a_record(int(record)) dbcollids[collection] = 1 return dbcollids.keys() def _transform_input_to_marcxml(file_input=""): """ Takes text-marc as input and transforms it to MARCXML. """ # Create temporary file to read from tmp_fd, filename = tempfile.mkstemp(dir=CFG_TMPSHAREDDIR) os.write(tmp_fd, file_input) os.close(tmp_fd) try: # Redirect output, transform, restore old references old_stdout = sys.stdout new_stdout = StringIO() sys.stdout = new_stdout transform_file(filename) finally: sys.stdout = old_stdout return new_stdout.getvalue() def _log(msg, logfile="webupload.log"): """ Log MSG into LOGFILE with timestamp. """ filedesc = open(CFG_LOGDIR + "/" + logfile, "a") filedesc.write(time.strftime("%Y-%m-%d %H:%M:%S") + " --> " + msg + "\n") filedesc.close() return def _write(req, msg): """ Write MSG to the output stream for the end user. """ req.write(msg + "\n") return diff --git a/modules/bibupload/lib/batchuploader_regression_tests.py b/modules/bibupload/lib/batchuploader_regression_tests.py new file mode 100644 index 000000000..1ace7d995 --- /dev/null +++ b/modules/bibupload/lib/batchuploader_regression_tests.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- +## +## This file is part of Invenio. +## Copyright (C) 2012 CERN. +## +## Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +# pylint: disable=C0301 + +"""Regression tests for the BatchUploader.""" + +import unittest +import os +import os.path +import urllib2 +import urlparse +import socket +from urllib import urlencode + +from invenio.testutils import make_test_suite, run_test_suite +from invenio.dbquery import run_sql +from invenio.jsonutils import json +from invenio.config import CFG_DEVEL_SITE, CFG_SITE_URL, CFG_TMPDIR, CFG_BINDIR +from invenio.bibsched import get_last_taskid, delete_task +from invenio.shellutils import run_shell_command +from invenio.bibupload_regression_tests import GenericBibUploadTest +from invenio.urlutils import make_user_agent_string +from invenio.search_engine import get_record + +CFG_HAS_CURL = os.path.exists("/usr/bin/curl") + +## NOTE: default invenio.conf authorization are granted only to 127.0.0.1 +## or 127.0.1.1, a.k.a. localhost, so the following checks if the current host +## is well recognized as localhost. Otherwise disable tests since they would +## fail due to not enough authorizations. +CFG_LOCALHOST_OK = socket.gethostbyname(urlparse.urlparse(CFG_SITE_URL)[1].split(':')[0]) in ('127.0.0.1', '127.0.1.1') + +class BatchUploaderRobotUploadTests(GenericBibUploadTest): + """ + Testing Class for robotupload + """ + def setUp(self): + GenericBibUploadTest.setUp(self) + self.callback_result_path = os.path.join(CFG_TMPDIR, 'robotupload.json') + self.callback_url = CFG_SITE_URL + '/httptest/post2?%s' % urlencode({ + "save": self.callback_result_path}) + if os.path.exists(self.callback_result_path): + os.remove(self.callback_result_path) + self.last_taskid = get_last_taskid() + self.marcxml = """\ + + + Doe, John + + + The title + + + TEST + +""" + self.req = urllib2.Request(CFG_SITE_URL + '/batchuploader/robotupload/insert') + self.req.add_header('Content-Type', 'application/marcxml+xml') + self.req.add_header('User-Agent', make_user_agent_string('BatchUploader')) + self.req.add_data(self.marcxml) + self.req_callback = urllib2.Request(CFG_SITE_URL + '/batchuploader/robotupload/insert?' + urlencode({ + 'callback_url': self.callback_url})) + self.req_callback.add_header('Content-Type', 'application/marcxml+xml') + self.req_callback.add_header('User-Agent', 'invenio_webupload') + self.req_callback.add_data(self.marcxml) + self.nonce_url = CFG_SITE_URL + '/batchuploader/robotupload/insert?' + urlencode({ + 'nonce': "1234", + 'callback_url': self.callback_url}) + self.req_nonce = urllib2.Request(self.nonce_url) + self.req_nonce.add_header('Content-Type', 'application/marcxml+xml') + self.req_nonce.add_header('User-Agent', 'invenio_webupload') + self.req_nonce.add_data(self.marcxml) + self.legacy_url = CFG_SITE_URL + '/batchuploader/robotupload' + + def tearDown(self): + GenericBibUploadTest.tearDown(self) + if os.path.exists(self.callback_result_path): + os.remove(self.callback_result_path) + current_task = get_last_taskid() + if current_task != self.last_taskid: + delete_task(current_task) + + if CFG_LOCALHOST_OK: + def test_bad_marcxml(self): + """batchuploader - robotupload bad MARCXML""" + self.req.add_data("BLABLA") + result = urllib2.urlopen(self.req).read() + self.assertEqual(result, "[ERROR] MARCXML is not valid.\n") + + if CFG_LOCALHOST_OK: + def test_bad_agent(self): + """batchuploader - robotupload bad agent""" + self.req.add_header('User-Agent', 'badagent') + result = urllib2.urlopen(self.req).read() + self.assertEqual(result, "[ERROR] Sorry, the badagent useragent cannot use the service.\n") + + if CFG_LOCALHOST_OK: + def test_simple_insert(self): + """batchuploader - robotupload simple insert""" + result = urllib2.urlopen(self.req).read() + self.failUnless("[INFO]" in result) + current_task = get_last_taskid() + run_shell_command("%s/bibupload %%s" % CFG_BINDIR, [str(current_task)]) + current_recid = run_sql("SELECT MAX(id) FROM bibrec")[0][0] + self.failIfEqual(self.last_recid, current_recid) + record = get_record(current_recid) + self.assertEqual(record['245'][0][0], [('a', 'The title')]) + + if CFG_DEVEL_SITE and CFG_LOCALHOST_OK: + ## This expect a particular testing web handler that is available + ## only when CFG_DEVEL_SITE is set up correctly + def test_insert_with_callback(self): + """batchuploader - robotupload insert with callback""" + result = urllib2.urlopen(self.req_callback).read() + self.failUnless("[INFO]" in result, '"%s" did not contained [INFO]' % result) + current_task = get_last_taskid() + run_shell_command("%s/bibupload %%s" % CFG_BINDIR, [str(current_task)]) + results = json.loads(open(self.callback_result_path).read()) + self.failUnless('results' in results) + self.assertEqual(len(results['results']), 1) + self.failUnless(results['results'][0]['success']) + self.failUnless(results['results'][0]['recid'] > 0) + self.failUnless("""Doe, John""" in results['results'][0]['marcxml'], results['results'][0]['marcxml']) + + def test_insert_with_nonce(self): + """batchuploader - robotupload insert with nonce""" + result = urllib2.urlopen(self.req_nonce).read() + self.failUnless("[INFO]" in result) + current_task = get_last_taskid() + run_shell_command("%s/bibupload %%s" % CFG_BINDIR, [str(current_task)]) + results = json.loads(open(self.callback_result_path).read()) + self.failUnless('results' in results, '"%s" did not contained [INFO]' % result) + self.assertEqual(len(results['results']), 1) + self.assertEqual(results['nonce'], "1234") + self.failUnless(results['results'][0]['success']) + self.failUnless(results['results'][0]['recid'] > 0) + self.failUnless("""Doe, John""" in results['results'][0]['marcxml'], results['results'][0]['marcxml']) + + if CFG_HAS_CURL: + def test_insert_via_curl(self): + """batchuploader - robotupload insert via CLI curl""" + curl_input_file = os.path.join(CFG_TMPDIR, 'curl_test.xml') + open(curl_input_file, "w").write(self.marcxml) + try: + result = run_shell_command('/usr/bin/curl -T %s %s -A %s -H "Content-Type: application/marcxml+xml"', [curl_input_file, self.nonce_url, make_user_agent_string('BatchUploader')])[1] + self.failUnless("[INFO]" in result) + current_task = get_last_taskid() + run_shell_command("%s/bibupload %%s" % CFG_BINDIR, [str(current_task)]) + results = json.loads(open(self.callback_result_path).read()) + self.failUnless('results' in results, '"%s" did not contained [INFO]' % result) + self.assertEqual(len(results['results']), 1) + self.assertEqual(results['nonce'], "1234") + self.failUnless(results['results'][0]['success']) + self.failUnless(results['results'][0]['recid'] > 0) + self.failUnless("""Doe, John""" in results['results'][0]['marcxml'], results['results'][0]['marcxml']) + finally: + os.remove(curl_input_file) + + def test_legacy_insert_via_curl(self): + curl_input_file = os.path.join(CFG_TMPDIR, 'curl_test.xml') + open(curl_input_file, "w").write(self.marcxml) + try: + ## curl -F 'file=@localfile.xml' -F 'mode=-i' [-F 'callback_url=http://...'] [-F 'nonce=1234'] http://cdsweb.cern.ch/batchuploader/robotupload -A invenio_webupload + code, result, err = run_shell_command("/usr/bin/curl -v -F file=@%s -F 'mode=-i' -F callback_url=%s -F nonce=1234 %s -A %s", [curl_input_file, self.callback_url, self.legacy_url, make_user_agent_string('BatchUploader')]) + self.failUnless("[INFO]" in result, '[INFO] not find in results: %s, %s' % (result, err)) + current_task = get_last_taskid() + run_shell_command("%s/bibupload %%s" % CFG_BINDIR, [str(current_task)]) + results = json.loads(open(self.callback_result_path).read()) + self.failUnless('results' in results, '"%s" did not contained [INFO]' % result) + self.assertEqual(len(results['results']), 1) + self.assertEqual(results['nonce'], "1234") + self.failUnless(results['results'][0]['success']) + self.failUnless(results['results'][0]['recid'] > 0) + self.failUnless("""Doe, John""" in results['results'][0]['marcxml'], results['results'][0]['marcxml']) + finally: + os.remove(curl_input_file) + + +TEST_SUITE = make_test_suite(BatchUploaderRobotUploadTests) + + +if __name__ == "__main__": + run_test_suite(TEST_SUITE, warn_user=True) diff --git a/modules/bibupload/lib/batchuploader_webinterface.py b/modules/bibupload/lib/batchuploader_webinterface.py index d2c8b4843..c835e2368 100644 --- a/modules/bibupload/lib/batchuploader_webinterface.py +++ b/modules/bibupload/lib/batchuploader_webinterface.py @@ -1,303 +1,322 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebUpload web interface""" __revision__ = "$Id$" __lastupdated__ = """$Date$""" from invenio.config import CFG_SITE_SECURE_URL from invenio.urlutils import redirect_to_url from invenio.messages import gettext_set_language from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory +from invenio.webinterface_handler_config import SERVER_RETURN, HTTP_NOT_FOUND +from invenio.webinterface_handler_wsgi_utils import handle_file_post from invenio.webuser import getUid, page_not_authorized from invenio.webpage import page from invenio.batchuploader_engine import metadata_upload, cli_upload, \ get_user_metadata_uploads, get_user_document_uploads, document_upload, \ get_daemon_doc_files, get_daemon_meta_files, cli_allocate_record, \ check_date, check_time, user_authorization try: import invenio.template batchuploader_templates = invenio.template.load('batchuploader') except: pass class WebInterfaceBatchUploaderPages(WebInterfaceDirectory): """Defines the set of /batchuploader pages.""" - _exports = ['', 'metadata', 'robotupload', 'metasubmit', 'history', 'documents', 'docsubmit', 'daemon', 'allocaterecord'] + _exports = ['', 'metadata', 'metasubmit', 'history', 'documents', 'docsubmit', 'daemon', 'allocaterecord'] + + def _lookup(self, component, path): + def restupload(req, form): + """Interface for robots used like this: + $ curl --data-binary '@localfile.xml' http://cdsweb.cern.ch/batchuploader/robotupload/[insert|replace|correct|append]?[callback_url=http://...]&nonce=1234 -A invenio_webupload + """ + filepath, mimetype = handle_file_post(req) + argd = wash_urlargd(form, {'callback_url': (str, None), 'nonce': (str, None)}) + return cli_upload(req, open(filepath), '--' + path[0], argd['callback_url'], argd['nonce']) + + def legacyrobotupload(req, form): + """Interface for robots used like this: + $ curl -F 'file=@localfile.xml' -F 'mode=-i' [-F 'callback_url=http://...'] [-F 'nonce=1234'] http://cdsweb.cern.ch/batchuploader/robotupload -A invenio_webupload + """ + argd = wash_urlargd(form, {'mode': (str, None), 'callback_url': (str, None), 'nonce': (str, None)}) + return cli_upload(req, form.get('file', None), argd['mode'], argd['callback_url'], argd['nonce']) + + if component == 'robotupload': + if path and path[0] in ('insert', 'replace', 'correct', 'append'): + return restupload, None + else: + return legacyrobotupload, None + else: + return None, path def index(self, req, form): """ The function called by default """ redirect_to_url(req, "%s/batchuploader/metadata" % (CFG_SITE_SECURE_URL)) def metadata(self, req, form): """ Display Metadata file upload form """ argd = wash_urlargd(form, {'error': (int, 0), 'filetype': (str, ""), 'mode': (str, ""), 'submit_date': (str, "yyyy-mm-dd"), 'submit_time': (str, "hh:mm:ss")}) _ = gettext_set_language(argd['ln']) not_authorized = user_authorization(req, argd['ln']) if not_authorized: return not_authorized uid = getUid(req) body = batchuploader_templates.tmpl_display_menu(argd['ln'], ref="metadata") body += batchuploader_templates.tmpl_display_web_metaupload_form(argd['ln'], argd['error'], argd['filetype'], argd['mode'], argd['submit_date'], argd['submit_time']) title = _("Metadata batch upload") return page(title = title, body = body, metaheaderadd = batchuploader_templates.tmpl_styles(), uid = uid, lastupdated = __lastupdated__, req = req, language = argd['ln'], navmenuid = "batchuploader") def documents(self, req, form): """ Display document upload form """ argd = wash_urlargd(form, { }) _ = gettext_set_language(argd['ln']) not_authorized = user_authorization(req, argd['ln']) if not_authorized: return not_authorized uid = getUid(req) body = batchuploader_templates.tmpl_display_menu(argd['ln'], ref="documents") body += batchuploader_templates.tmpl_display_web_docupload_form(argd['ln']) title = _("Document batch upload") return page(title = title, body = body, metaheaderadd = batchuploader_templates.tmpl_styles(), uid = uid, lastupdated = __lastupdated__, req = req, language = argd['ln'], navmenuid = "batchuploader") def docsubmit(self, req, form): """ Function called after submitting the document upload form. Performs the appropiate action depending on the input parameters """ argd = wash_urlargd(form, {'docfolder': (str, ""), 'matching': (str, ""), 'mode': (str, ""), 'submit_date': (str, ""), 'submit_time': (str, ""), 'priority': (str, "")}) _ = gettext_set_language(argd['ln']) not_authorized = user_authorization(req, argd['ln']) if not_authorized: return not_authorized #Check if input fields are correct, if not, redirect to upload form correct_date = check_date(argd['submit_date']) correct_time = check_time(argd['submit_time']) if correct_time != 0: redirect_to_url(req, "%s/batchuploader/documents?error=1&mode=%s&docfolder=%s&matching=%s&submit_date=%s" % (CFG_SITE_SECURE_URL, argd['mode'], argd['docfolder'], argd['matching'], argd['submit_date'])) if correct_date != 0: redirect_to_url(req, "%s/batchuploader/documents?error=%s&mode=%s&docfolder=%s&matching=%s&submit_time=%s" % (CFG_SITE_SECURE_URL, correct_date, argd['mode'], argd['docfolder'], argd['matching'], argd['submit_time'])) date = argd['submit_date'] not in ['yyyy-mm-dd', ''] \ and argd['submit_date'] or '' time = argd['submit_time'] not in ['hh:mm:ss', ''] \ and argd['submit_time'] or '' if date != '' and time == '': redirect_to_url(req, "%s/batchuploader/documents?error=1&mode=%s&docfolder=%s&matching=%s&submit_date=%s" % (CFG_SITE_SECURE_URL, argd['mode'], argd['docfolder'], argd['matching'], argd['submit_date'])) elif date == '' and time != '': redirect_to_url(req, "%s/batchuploader/documents?error=4&mode=%s&docfolder=%s&matching=%s&submit_time=%s" % (CFG_SITE_SECURE_URL, argd['mode'], argd['docfolder'], argd['matching'], argd['submit_time'])) errors, info = document_upload(req, argd['docfolder'], argd['matching'], argd['mode'], date, time, argd['ln'], argd['priority']) body = batchuploader_templates.tmpl_display_menu(argd['ln']) uid = getUid(req) navtrail = '''%s''' % \ (CFG_SITE_SECURE_URL, _("Document batch upload")) body += batchuploader_templates.tmpl_display_web_docupload_result(argd['ln'], errors, info) title = _("Document batch upload result") return page(title = title, body = body, metaheaderadd = batchuploader_templates.tmpl_styles(), uid = uid, navtrail = navtrail, lastupdated = __lastupdated__, req = req, language = argd['ln'], navmenuid = "batchuploader") - def robotupload(self, req, form): - """Interface for robots used like this: - $ curl -F 'file=@localfile.xml' -F 'mode=-i' [-F 'callback_url=http://...' http://cdsweb.cern.ch/batchuploader/robotupload] -A invenio_webupload - """ - argd = wash_urlargd(form, {'mode': (str, None), 'callback_url': (str, None)}) - cli_upload(req, form.get('file', None), argd['mode'], argd['callback_url']) - def allocaterecord(self, req, form): """ Interface for robots to allocate a record and obtain a record identifier """ return cli_allocate_record(req) def metasubmit(self, req, form): """ Function called after submitting the metadata upload form. Checks if input fields are correct before uploading. """ argd = wash_urlargd(form, {'filetype': (str, None), 'mode': (str, None), 'submit_date': (str, None), 'submit_time': (str, None), 'filename': (str, None), 'priority': (str, None)}) _ = gettext_set_language(argd['ln']) not_authorized = user_authorization(req, argd['ln']) if not_authorized: return not_authorized #Check if input fields are correct, if not, redirect to upload form correct_date = check_date(argd['submit_date']) correct_time = check_time(argd['submit_time']) if correct_time != 0: redirect_to_url(req, "%s/batchuploader/metadata?error=1&filetype=%s&mode=%s&submit_date=%s" % (CFG_SITE_SECURE_URL, argd['filetype'], argd['mode'], argd['submit_date'])) if not form.get('metafile', None) or not form.get('metafile', None).value: # Empty file redirect_to_url(req, "%s/batchuploader/metadata?error=2&filetype=%s&mode=%s&submit_date=%s&submit_time=%s" % (CFG_SITE_SECURE_URL, argd['filetype'], argd['mode'], argd['submit_date'], argd['submit_time'])) if correct_date != 0: redirect_to_url(req, "%s/batchuploader/metadata?error=%s&filetype=%s&mode=%s&submit_time=%s" % (CFG_SITE_SECURE_URL, correct_date, argd['filetype'], argd['mode'], argd['submit_time'])) date = argd['submit_date'] not in ['yyyy-mm-dd', ''] \ and argd['submit_date'] or '' time = argd['submit_time'] not in ['hh:mm:ss', ''] \ and argd['submit_time'] or '' if date != '' and time == '': redirect_to_url(req, "%s/batchuploader/metadata?error=1&filetype=%s&mode=%s&submit_date=%s" % (CFG_SITE_SECURE_URL, argd['filetype'], argd['mode'], argd['submit_date'])) elif date == '' and time != '': redirect_to_url(req, "%s/batchuploader/metadata?error=4&filetype=%s&mode=%s&submit_time=%s" % (CFG_SITE_SECURE_URL, argd['filetype'], argd['mode'], argd['submit_time'])) #Function where bibupload queues the file auth_code, auth_message = metadata_upload(req, form.get('metafile', None), argd['filetype'], argd['mode'].split()[0], date, time, argd['filename'], argd['ln'], argd['priority']) if auth_code == 1: # not authorized referer = '/batchuploader/' return page_not_authorized(req=req, referer=referer, text=auth_message, navmenuid="batchuploader") else: uid = getUid(req) body = batchuploader_templates.tmpl_display_menu(argd['ln']) if auth_code == 2: # invalid MARCXML body += batchuploader_templates.tmpl_invalid_marcxml(argd['ln']) title = _("Invalid MARCXML") else: body += batchuploader_templates.tmpl_upload_successful(argd['ln']) title = _("Upload successful") navtrail = '''%s''' % \ (CFG_SITE_SECURE_URL, _("Metadata batch upload")) return page(title = title, body = body, uid = uid, navtrail = navtrail, lastupdated = __lastupdated__, req = req, language = argd['ln'], navmenuid = "batchuploader") def history(self, req, form): """Display upload history of the current user""" argd = wash_urlargd(form, {}) _ = gettext_set_language(argd['ln']) not_authorized = user_authorization(req, argd['ln']) if not_authorized: return not_authorized uploaded_meta_files = get_user_metadata_uploads(req) uploaded_doc_files = get_user_document_uploads(req) uid = getUid(req) body = batchuploader_templates.tmpl_display_menu(argd['ln'], ref="history") body += batchuploader_templates.tmpl_upload_history(argd['ln'], uploaded_meta_files, uploaded_doc_files) title = _("Upload history") return page(title = title, body = body, metaheaderadd = batchuploader_templates.tmpl_styles(), uid = uid, lastupdated = __lastupdated__, req = req, language = argd['ln'], navmenuid = "batchuploader") def daemon(self, req, form): """ Display content of folders where the daemon will look into """ argd = wash_urlargd(form, {}) _ = gettext_set_language(argd['ln']) not_authorized = user_authorization(req, argd['ln']) if not_authorized: return not_authorized docs = get_daemon_doc_files() metadata = get_daemon_meta_files() uid = getUid(req) body = batchuploader_templates.tmpl_display_menu(argd['ln'], ref="daemon") body += batchuploader_templates.tmpl_daemon_content(argd['ln'], docs, metadata) title = _("Batch Uploader: Daemon monitor") return page(title = title, body = body, metaheaderadd = batchuploader_templates.tmpl_styles(), uid = uid, lastupdated = __lastupdated__, req = req, language = argd['ln'], navmenuid = "batchuploader") def __call__(self, req, form): """Redirect calls without final slash.""" redirect_to_url(req, '%s/batchuploader/metadata' % CFG_SITE_SECURE_URL) diff --git a/modules/bibupload/lib/bibupload.py b/modules/bibupload/lib/bibupload.py index 83477f4ab..d9f3ab11d 100644 --- a/modules/bibupload/lib/bibupload.py +++ b/modules/bibupload/lib/bibupload.py @@ -1,2233 +1,2242 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibUpload: Receive MARC XML file and update the appropriate database tables according to options. """ __revision__ = "$Id$" import os import re import sys import time from datetime import datetime from zlib import compress import socket import marshal import copy import tempfile import urlparse import urllib2 from invenio.config import CFG_OAI_ID_FIELD, \ CFG_BIBUPLOAD_REFERENCE_TAG, \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \ CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG, \ CFG_BIBUPLOAD_STRONG_TAGS, \ CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS, \ CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE, \ CFG_BIBUPLOAD_DELETE_FORMATS, \ CFG_SITE_URL, CFG_SITE_RECORD, \ CFG_OAI_PROVENANCE_ALTERED_SUBFIELD from invenio.jsonutils import json, CFG_JSON_AVAILABLE from invenio.bibupload_config import CFG_BIBUPLOAD_CONTROLFIELD_TAGS, \ CFG_BIBUPLOAD_SPECIAL_TAGS from invenio.dbquery import run_sql, \ Error from invenio.bibrecord import create_records, \ record_add_field, \ record_delete_field, \ record_xml_output, \ record_get_field_instances, \ record_get_field_value, \ record_get_field_values, \ field_get_subfield_values, \ field_get_subfield_instances, \ record_modify_subfield, \ record_delete_subfield_from, \ record_delete_fields, \ record_add_subfield_into, \ record_find_field, \ record_extract_oai_id from invenio.search_engine import get_record from invenio.dateutils import convert_datestruct_to_datetext from invenio.errorlib import register_exception from invenio.intbitset import intbitset +from invenio.urlutils import make_user_agent_string from invenio.config import CFG_WEBSUBMIT_FILEDIR from invenio.bibtask import task_init, write_message, \ task_set_option, task_get_option, task_get_task_param, task_update_status, \ task_update_progress, task_sleep_now_if_required, fix_argv_paths from invenio.bibdocfile import BibRecDocs, file_strip_ext, normalize_format, \ get_docname_from_url, check_valid_url, download_url, \ KEEP_OLD_VALUE, decompose_bibdocfile_url, InvenioWebSubmitFileError, \ bibdocfile_url_p, CFG_BIBDOCFILE_AVAILABLE_FLAGS, guess_format_from_url from invenio.search_engine import search_pattern #Statistic variables stat = {} stat['nb_records_to_upload'] = 0 stat['nb_records_updated'] = 0 stat['nb_records_inserted'] = 0 stat['nb_errors'] = 0 stat['nb_holdingpen'] = 0 stat['exectime'] = time.localtime() _WRITING_RIGHTS = None ## Let's set a reasonable timeout for URL request (e.g. FFT) socket.setdefaulttimeout(40) _re_find_001 = re.compile('\\s*(\\d*)\\s*', re.S) def bibupload_pending_recids(): """This function embed a bit of A.I. and is more a hack than an elegant algorithm. It should be updated in case bibupload/bibsched are modified in incompatible ways. This function return the intbitset of all the records that are being (or are scheduled to be) touched by other bibuploads. """ options = run_sql("""SELECT arguments FROM schTASK WHERE status<>'DONE' AND proc='bibupload' AND (status='RUNNING' OR status='CONTINUING' OR status='WAITING' OR status='SCHEDULED' OR status='ABOUT TO STOP' OR status='ABOUT TO SLEEP')""") ret = intbitset() xmls = [] if options: for arguments in options: arguments = marshal.loads(arguments[0]) for argument in arguments[1:]: if argument.startswith('/'): # XMLs files are recognizable because they're absolute # files... xmls.append(argument) for xmlfile in xmls: # Let's grep for the 001 try: xml = open(xmlfile).read() ret += [int(group[1]) for group in _re_find_001.findall(xml)] except: continue return ret ### bibupload engine functions: def bibupload(record, opt_tag=None, opt_mode=None, opt_stage_to_start_from=1, opt_notimechange=0, oai_rec_id = "", pretend=False): """Main function: process a record and fit it in the tables bibfmt, bibrec, bibrec_bibxxx, bibxxx with proper record metadata. Return (error_code, recID) of the processed record. """ assert(opt_mode in ('insert', 'replace', 'replace_or_insert', 'reference', 'correct', 'append', 'format', 'holdingpen', 'delete')) error = None now = datetime.now() # will hold record creation/modification date # If there are special tags to proceed check if it exists in the record if opt_tag is not None and not(record.has_key(opt_tag)): msg = " Failed: Tag not found, enter a valid tag to update." write_message(msg, verbose=1, stream=sys.stderr) return (1, -1, msg) # Extraction of the Record Id from 001, SYSNO or OAIID tags: rec_id = retrieve_rec_id(record, opt_mode, pretend=pretend) if rec_id == -1: msg = " Failed: either the record already exists and insert was " \ "requested or the record does not exists and " \ "replace/correct/append has been used" write_message(msg, verbose=1, stream=sys.stderr) return (1, -1, msg) elif rec_id > 0: write_message(" -Retrieve record ID (found %s): DONE." % rec_id, verbose=2) if not record.has_key('001'): # Found record ID by means of SYSNO or OAIID, and the # input MARCXML buffer does not have this 001 tag, so we # should add it now: error = record_add_field(record, '001', controlfield_value=rec_id) if error is None: msg = " Failed: Error during adding the 001 controlfield " \ "to the record" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) else: error = None write_message(" -Added tag 001: DONE.", verbose=2) write_message(" -Check if the xml marc file is already in the database: DONE" , verbose=2) # Reference mode check if there are reference tag if opt_mode == 'reference': error = extract_tag_from_record(record, CFG_BIBUPLOAD_REFERENCE_TAG) if error is None: msg = " Failed: No reference tags has been found..." write_message(msg, verbose=1, stream=sys.stderr) return (1, -1, msg) else: error = None write_message(" -Check if reference tags exist: DONE", verbose=2) record_deleted_p = False if opt_mode == 'insert' or \ (opt_mode == 'replace_or_insert') and rec_id is None: insert_mode_p = True # Insert the record into the bibrec databases to have a recordId rec_id = create_new_record(pretend=pretend) write_message(" -Creation of a new record id (%d): DONE" % rec_id, verbose=2) # we add the record Id control field to the record error = record_add_field(record, '001', controlfield_value=rec_id) if error is None: msg = " Failed: Error during adding the 001 controlfield " \ "to the record" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) else: error = None error = record_add_field(record, '005', controlfield_value=now.strftime("%Y%m%d%H%M%S.0")) if error is None: write_message(" Failed: Error during adding to 005 controlfield to record",verbose=1,stream=sys.stderr) return (1, int(rec_id)) else: error=None elif opt_mode != 'insert' and opt_mode != 'format' and \ opt_stage_to_start_from != 5: insert_mode_p = False # Update Mode # Retrieve the old record to update rec_old = get_record(rec_id) record_had_altered_bit = record_get_field_values(rec_old, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4], CFG_OAI_PROVENANCE_ALTERED_SUBFIELD) # Also save a copy to restore previous situation in case of errors original_record = get_record(rec_id) if original_record.has_key('005'): record_delete_field(original_record,'005') if rec_old is None: msg = " Failed during the creation of the old record!" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) else: write_message(" -Retrieve the old record to update: DONE", verbose=2) if rec_old.has_key('005'): record_delete_field(rec_old,'005') # In Replace mode, take over old strong tags if applicable: if opt_mode == 'replace' or \ opt_mode == 'replace_or_insert': copy_strong_tags_from_old_record(record, rec_old) # Delete tags to correct in the record if opt_mode == 'correct' or opt_mode == 'reference': delete_tags_to_correct(record, rec_old, opt_tag) write_message(" -Delete the old tags to correct in the old record: DONE", verbose=2) # Delete tags specified if in delete mode if opt_mode == 'delete': record = delete_tags(record, rec_old) write_message(" -Delete specified tags in the old record: DONE", verbose=2) # Append new tag to the old record and update the new record with the old_record modified if opt_mode == 'append' or opt_mode == 'correct' or \ opt_mode == 'reference': record = append_new_tag_to_old_record(record, rec_old, opt_tag, opt_mode) write_message(" -Append new tags to the old record: DONE", verbose=2) # 005 tag should be added everytime the record is modified # If an exiting record is modified, its 005 tag should be overwritten with a new revision value if record.has_key('005'): record_delete_field(record, '005') write_message(" Deleted the existing 005 tag.", verbose=2) error = record_add_field(record, '005', controlfield_value=now.strftime("%Y%m%d%H%M%S.0")) if error is None: write_message(" Failed: Error during adding to 005 controlfield to record",verbose=1,stream=sys.stderr) return (1, int(rec_id)) else: error=None write_message(" -Added tag 005: DONE. "+ str(record_get_field_value(record,'005','','')), verbose=2) # if record_had_altered_bit, this must be set to true, since the # record has been altered. if record_had_altered_bit: oai_provenance_fields = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4]) for oai_provenance_field in oai_provenance_fields: for i, (code, dummy_value) in enumerate(oai_provenance_field[0]): if code == CFG_OAI_PROVENANCE_ALTERED_SUBFIELD: oai_provenance_field[0][i] = (code, 'true') # now we clear all the rows from bibrec_bibxxx from the old # record (they will be populated later (if needed) during # stage 4 below): delete_bibrec_bibxxx(rec_old, rec_id, pretend=pretend) record_deleted_p = True write_message(" -Clean bibrec_bibxxx: DONE", verbose=2) write_message(" -Stage COMPLETED", verbose=2) try: if not record_is_valid(record): msg = "ERROR: record is not valid" write_message(msg, verbose=1, stream=sys.stderr) return (1, -1, msg) # Have a look if we have FMT tags we_have_fmt_tags_p = extract_tag_from_record(record, 'FMT') is not None write_message("Stage 1: Start (Insert of FMT tags if exist).", verbose=2) if opt_stage_to_start_from <= 1 and we_have_fmt_tags_p: record = insert_fmt_tags(record, rec_id, opt_mode, pretend=pretend) if record is None: msg = " Stage 1 failed: Error while inserting FMT tags" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) elif record == 0: # Mode format finished stat['nb_records_updated'] += 1 return (0, int(rec_id), "") write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Have a look if we have FFT tags write_message("Stage 2: Start (Process FFT tags if exist).", verbose=2) record_had_FFT = False if opt_stage_to_start_from <= 2 and \ extract_tag_from_record(record, 'FFT') is not None: record_had_FFT = True if not writing_rights_p(): write_message(" Stage 2 failed: Error no rights to write fulltext files", verbose=1, stream=sys.stderr) task_update_status("ERROR") sys.exit(1) try: record = elaborate_fft_tags(record, rec_id, opt_mode, pretend=pretend) except Exception, e: register_exception() msg = " Stage 2 failed: Error while elaborating FFT tags: %s" % e write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) if record is None: msg = " Stage 2 failed: Error while elaborating FFT tags" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Have a look if we have FFT tags write_message("Stage 2B: Start (Synchronize 8564 tags).", verbose=2) has_bibdocs = run_sql("SELECT count(id_bibdoc) FROM bibrec_bibdoc JOIN bibdoc ON id_bibdoc=id WHERE id_bibrec=%s AND status<>'DELETED'", (rec_id, ))[0][0] > 0 if opt_stage_to_start_from <= 2 and (has_bibdocs or record_had_FFT or extract_tag_from_record(record, '856') is not None): try: record = synchronize_8564(rec_id, record, record_had_FFT, pretend=pretend) except Exception, e: register_exception(alert_admin=True) msg = " Stage 2B failed: Error while synchronizing 8564 tags: %s" % e write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) if record is None: msg = " Stage 2B failed: Error while synchronizing 8564 tags" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Update of the BibFmt write_message("Stage 3: Start (Update bibfmt).", verbose=2) if opt_stage_to_start_from <= 3: # format the single record as xml rec_xml_new = record_xml_output(record) # Update bibfmt with the format xm of this record if opt_mode != 'format': modification_date = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(record_get_field_value(record,'005'),'%Y%m%d%H%M%S.0')) error = update_bibfmt_format(rec_id, rec_xml_new, 'xm', modification_date, pretend=pretend) if error == 1: msg = " Failed: error during update_bibfmt_format 'xm'" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) if CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE: error = update_bibfmt_format(rec_id, marshal.dumps(record), 'recstruct', modification_date, pretend=pretend) if error == 1: msg = " Failed: error during update_bibfmt_format 'recstruct'" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) if not we_have_fmt_tags_p: # delete some formats like HB upon record change: for format_to_delete in CFG_BIBUPLOAD_DELETE_FORMATS: try: delete_bibfmt_format(rec_id, format_to_delete, pretend=pretend) except: # OK, some formats like HB could not have been deleted, no big deal pass # archive MARCXML format of this record for version history purposes: error = archive_marcxml_for_history(rec_id, pretend=pretend) if error == 1: msg = " Failed to archive MARCXML for history" write_message(msg, verbose=1, stream=sys.stderr) return (1, int(rec_id), msg) else: write_message(" -Archived MARCXML for history : DONE", verbose=2) write_message(" -Stage COMPLETED", verbose=2) # Update the database MetaData write_message("Stage 4: Start (Update the database with the metadata).", verbose=2) if opt_stage_to_start_from <= 4: if opt_mode in ('insert', 'replace', 'replace_or_insert', 'append', 'correct', 'reference', 'delete'): update_database_with_metadata(record, rec_id, oai_rec_id, pretend=pretend) record_deleted_p = False else: write_message(" -Stage NOT NEEDED in mode %s" % opt_mode, verbose=2) write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Finally we update the bibrec table with the current date write_message("Stage 5: Start (Update bibrec table with current date).", verbose=2) if opt_stage_to_start_from <= 5 and \ opt_notimechange == 0 and \ not insert_mode_p: write_message(" -Retrieved current localtime: DONE", verbose=2) update_bibrec_modif_date(now.strftime("%Y-%m-%d %H:%M:%S"), rec_id, pretend=pretend) write_message(" -Stage COMPLETED", verbose=2) else: write_message(" -Stage NOT NEEDED", verbose=2) # Increase statistics if insert_mode_p: stat['nb_records_inserted'] += 1 else: stat['nb_records_updated'] += 1 # Upload of this record finish write_message("Record "+str(rec_id)+" DONE", verbose=1) return (0, int(rec_id), "") finally: if record_deleted_p: ## BibUpload has failed living the record deleted. We should ## back the original record then. update_database_with_metadata(original_record, rec_id, oai_rec_id, pretend=pretend) write_message(" Restored original record", verbose=1, stream=sys.stderr) def record_is_valid(record): """ Check if the record is valid. Currently this simply checks if the record has exactly one rec_id. @param record: the record @type record: recstruct @return: True if the record is valid @rtype: bool """ rec_ids = record_get_field_values(record, tag="001") if len(rec_ids) != 1: write_message(" The record is not valid: it has not a single rec_id: %s" % (rec_ids), stream=sys.stderr) return False return True def find_record_ids_by_oai_id(oaiId): """ A method finding the records identifier provided the oai identifier returns a list of identifiers matching a given oai identifier """ # Is this record already in invenio (matching by oaiid) if oaiId: recids = search_pattern(p=oaiId, f=CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, m='e') # Is this record already in invenio (matching by reportnumber i.e. # particularly 037. Idea: to avoid doubbles insertions) repnumber = oaiId.split(":")[-1] if repnumber: recids |= search_pattern(p = repnumber, f = "reportnumber", m = 'e' ) # Is this record already in invenio (matching by reportnumber i.e. # particularly 037. Idea: to avoid double insertions) repnumber = "arXiv:" + oaiId.split(":")[-1] recids |= search_pattern(p = repnumber, f = "reportnumber", m = 'e' ) return recids else: return intbitset() def insert_record_into_holding_pen(record, oai_id, pretend=False): query = "INSERT INTO bibHOLDINGPEN (oai_id, changeset_date, changeset_xml, id_bibrec) VALUES (%s, NOW(), %s, %s)" xml_record = record_xml_output(record) bibrec_ids = find_record_ids_by_oai_id(oai_id) # here determining the identifier of the record if len(bibrec_ids) > 0: bibrec_id = bibrec_ids.pop() else: # id not found by using the oai_id, let's use a wider search based # on any information we might have. bibrec_id = retrieve_rec_id(record, 'holdingpen', pretend=pretend) if bibrec_id is None: bibrec_id = 0 if not pretend: run_sql(query, (oai_id, xml_record, bibrec_id)) # record_id is logged as 0! ( We are not inserting into the main database) log_record_uploading(oai_id, task_get_task_param('task_id', 0), 0, 'H', pretend=pretend) stat['nb_holdingpen'] += 1 def print_out_bibupload_statistics(): """Print the statistics of the process""" out = "Task stats: %(nb_input)d input records, %(nb_updated)d updated, " \ "%(nb_inserted)d inserted, %(nb_errors)d errors, %(nb_holdingpen)d inserted to holding pen. " \ "Time %(nb_sec).2f sec." % { \ 'nb_input': stat['nb_records_to_upload'], 'nb_updated': stat['nb_records_updated'], 'nb_inserted': stat['nb_records_inserted'], 'nb_errors': stat['nb_errors'], 'nb_holdingpen': stat['nb_holdingpen'], 'nb_sec': time.time() - time.mktime(stat['exectime']) } write_message(out) def open_marc_file(path): """Open a file and return the data""" try: # open the file containing the marc document marc_file = open(path,'r') marc = marc_file.read() marc_file.close() except IOError, erro: write_message("Error: %s" % erro, verbose=1, stream=sys.stderr) write_message("Exiting.", sys.stderr) if erro.errno == 2: # No such file or directory # Not scary task_update_status("CERROR") else: task_update_status("ERROR") sys.exit(1) return marc def xml_marc_to_records(xml_marc): """create the records""" # Creation of the records from the xml Marc in argument recs = create_records(xml_marc, 1, 1) if recs == []: write_message("Error: Cannot parse MARCXML file.", verbose=1, stream=sys.stderr) write_message("Exiting.", sys.stderr) task_update_status("ERROR") sys.exit(1) elif recs[0][0] is None: write_message("Error: MARCXML file has wrong format: %s" % recs, verbose=1, stream=sys.stderr) write_message("Exiting.", sys.stderr) task_update_status("CERROR") sys.exit(1) else: recs = map((lambda x:x[0]), recs) return recs def find_record_format(rec_id, format): """Look whether record REC_ID is formatted in FORMAT, i.e. whether FORMAT exists in the bibfmt table for this record. Return the number of times it is formatted: 0 if not, 1 if yes, 2 if found more than once (should never occur). """ out = 0 query = """SELECT COUNT(id) FROM bibfmt WHERE id_bibrec=%s AND format=%s""" params = (rec_id, format) res = [] try: res = run_sql(query, params) out = res[0][0] except Error, error: write_message(" Error during find_record_format() : %s " % error, verbose=1, stream=sys.stderr) return out def find_record_from_recid(rec_id): """ Try to find record in the database from the REC_ID number. Return record ID if found, None otherwise. """ try: res = run_sql("SELECT id FROM bibrec WHERE id=%s", (rec_id,)) except Error, error: write_message(" Error during find_record_bibrec() : %s " % error, verbose=1, stream=sys.stderr) if res: return res[0][0] else: return None def find_record_from_sysno(sysno): """ Try to find record in the database from the external SYSNO number. Return record ID if found, None otherwise. """ bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:2]+'x' bibrec_bibxxx = 'bibrec_' + bibxxx try: res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s AND bb.id_bibxxx=b.id""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx}, (CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, sysno,)) except Error, error: write_message(" Error during find_record_from_sysno(): %s " % error, verbose=1, stream=sys.stderr) if res: return res[0][0] else: return None def find_records_from_extoaiid(extoaiid, extoaisrc=None): """ Try to find records in the database from the external EXTOAIID number. Return list of record ID if found, None otherwise. """ assert(CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5] == CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[:5]) bibxxx = 'bib'+CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:2]+'x' bibrec_bibxxx = 'bibrec_' + bibxxx try: write_message(' Looking for extoaiid="%s" with extoaisrc="%s"' % (extoaiid, extoaisrc), verbose=9) id_bibrecs = intbitset(run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s AND bb.id_bibxxx=b.id""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx}, (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, extoaiid,))) write_message(' Partially found %s for extoaiid="%s"' % (id_bibrecs, extoaiid), verbose=9) ret = intbitset() for id_bibrec in id_bibrecs: record = get_record(id_bibrec) instances = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4]) write_message(' recid %s -> instances "%s"' % (id_bibrec, instances), verbose=9) for instance in instances: this_extoaisrc = field_get_subfield_values(instance, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5]) this_extoaisrc = this_extoaisrc and this_extoaisrc[0] or None this_extoaiid = field_get_subfield_values(instance, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5]) this_extoaiid = this_extoaiid and this_extoaiid[0] or None write_message(" this_extoaisrc -> %s, this_extoaiid -> %s" % (this_extoaisrc, this_extoaiid), verbose=9) if this_extoaiid == extoaiid: write_message(' recid %s -> provenance "%s"' % (id_bibrec, this_extoaisrc), verbose=9) if this_extoaisrc == extoaisrc: write_message('Found recid %s for extoaiid="%s" with provenance="%s"' % (id_bibrec, extoaiid, extoaisrc), verbose=9) ret.add(id_bibrec) break if this_extoaisrc is None: write_message('WARNING: Found recid %s for extoaiid="%s" that doesn\'t specify any provenance, while input record does.' % (id_bibrec, extoaiid), stream=sys.stderr) if extoaisrc is None: write_message('WARNING: Found recid %s for extoaiid="%s" that specify a provenance (%s), while input record does not have a provenance.' % (id_bibrec, extoaiid, this_extoaisrc), stream=sys.stderr) return ret except Error, error: write_message(" Error during find_records_from_extoaiid(): %s " % error, verbose=1, stream=sys.stderr) raise def find_record_from_oaiid(oaiid): """ Try to find record in the database from the OAI ID number and OAI SRC. Return record ID if found, None otherwise. """ bibxxx = 'bib'+CFG_OAI_ID_FIELD[0:2]+'x' bibrec_bibxxx = 'bibrec_' + bibxxx try: res = run_sql("""SELECT bb.id_bibrec FROM %(bibrec_bibxxx)s AS bb, %(bibxxx)s AS b WHERE b.tag=%%s AND b.value=%%s AND bb.id_bibxxx=b.id""" % \ {'bibxxx': bibxxx, 'bibrec_bibxxx': bibrec_bibxxx}, (CFG_OAI_ID_FIELD, oaiid,)) except Error, error: write_message(" Error during find_record_from_oaiid(): %s " % error, verbose=1, stream=sys.stderr) if res: return res[0][0] else: return None def extract_tag_from_record(record, tag_number): """ Extract the tag_number for record.""" # first step verify if the record is not already in the database if record: return record.get(tag_number, None) return None def retrieve_rec_id(record, opt_mode, pretend=False): """Retrieve the record Id from a record by using tag 001 or SYSNO or OAI ID tag. opt_mod is the desired mode.""" rec_id = None # 1st step: we look for the tag 001 tag_001 = extract_tag_from_record(record, '001') if tag_001 is not None: # We extract the record ID from the tag rec_id = tag_001[0][3] # if we are in insert mode => error if opt_mode == 'insert': write_message(" Failed: tag 001 found in the xml" \ " submitted, you should use the option replace," \ " correct or append to replace an existing" \ " record. (-h for help)", verbose=1, stream=sys.stderr) return -1 else: # we found the rec id and we are not in insert mode => continue # we try to match rec_id against the database: if find_record_from_recid(rec_id) is not None: # okay, 001 corresponds to some known record return int(rec_id) elif opt_mode in ('replace', 'replace_or_insert'): if task_get_option('force'): # we found the rec_id but it's not in the system and we are # requested to replace records. Therefore we create on the fly # a empty record allocating the recid. write_message(" Warning: tag 001 found in the xml with" " value %(rec_id)s, but rec_id %(rec_id)s does" " not exist. Since the mode replace was" " requested the rec_id %(rec_id)s is allocated" " on-the-fly." % {"rec_id" : rec_id}, stream=sys.stderr) return create_new_record(rec_id=rec_id, pretend=pretend) else: # Since --force was not used we are going to raise an error write_message(" Failed: tag 001 found in the xml" " submitted with value %(rec_id)s. The" " corresponding record however does not" " exists. If you want to really create" " such record, please use the --force" " parameter when calling bibupload." % { "rec_id": rec_id}, stream=sys.stderr) return -1 else: # The record doesn't exist yet. We shall have try to check # the SYSNO or OAI id later. write_message(" -Tag 001 value not found in database.", verbose=9) rec_id = None else: write_message(" -Tag 001 not found in the xml marc file.", verbose=9) if rec_id is None: # 2nd step we look for the SYSNO sysnos = record_get_field_values(record, CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3], CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or "", CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or "", CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6]) if sysnos: sysno = sysnos[0] # there should be only one external SYSNO write_message(" -Checking if SYSNO " + sysno + \ " exists in the database", verbose=9) # try to find the corresponding rec id from the database rec_id = find_record_from_sysno(sysno) if rec_id is not None: # rec_id found pass else: # The record doesn't exist yet. We will try to check # external and internal OAI ids later. write_message(" -Tag SYSNO value not found in database.", verbose=9) rec_id = None else: write_message(" -Tag SYSNO not found in the xml marc file.", verbose=9) if rec_id is None: # 2nd step we look for the external OAIID extoai_fields = record_get_field_instances(record, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or "", CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or "") if extoai_fields: for field in extoai_fields: extoaiid = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6]) extoaisrc = field_get_subfield_values(field, CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6]) if extoaiid: extoaiid = extoaiid[0] if extoaisrc: extoaisrc = extoaisrc[0] else: extoaisrc = None write_message(" -Checking if EXTOAIID %s (%s) exists in the database" % (extoaiid, extoaisrc), verbose=9) # try to find the corresponding rec id from the database try: rec_ids = find_records_from_extoaiid(extoaiid, extoaisrc) except Error, e: write_message(e, verbose=1, stream=sys.stderr) return -1 if rec_ids: # rec_id found rec_id = rec_ids.pop() break else: # The record doesn't exist yet. We will try to check # OAI id later. write_message(" -Tag EXTOAIID value not found in database.", verbose=9) rec_id = None else: write_message(" -Tag EXTOAIID not found in the xml marc file.", verbose=9) if rec_id is None: # 4th step we look for the OAI ID oaiidvalues = record_get_field_values(record, CFG_OAI_ID_FIELD[0:3], CFG_OAI_ID_FIELD[3:4] != "_" and \ CFG_OAI_ID_FIELD[3:4] or "", CFG_OAI_ID_FIELD[4:5] != "_" and \ CFG_OAI_ID_FIELD[4:5] or "", CFG_OAI_ID_FIELD[5:6]) if oaiidvalues: oaiid = oaiidvalues[0] # there should be only one OAI ID write_message(" -Check if local OAI ID " + oaiid + \ " exist in the database", verbose=9) # try to find the corresponding rec id from the database rec_id = find_record_from_oaiid(oaiid) if rec_id is not None: # rec_id found pass else: write_message(" -Tag OAI ID value not found in database.", verbose=9) rec_id = None else: write_message(" -Tag SYSNO not found in the xml marc file.", verbose=9) # Now we should have detected rec_id from SYSNO or OAIID # tags. (None otherwise.) if rec_id: if opt_mode == 'insert': write_message(" Failed : Record found in the database," \ " you should use the option replace," \ " correct or append to replace an existing" \ " record. (-h for help)", verbose=1, stream=sys.stderr) return -1 else: if opt_mode != 'insert' and \ opt_mode != 'replace_or_insert': write_message(" Failed : Record not found in the database."\ " Please insert the file before updating it."\ " (-h for help)", verbose=1, stream=sys.stderr) return -1 return rec_id and int(rec_id) or None ### Insert functions def create_new_record(rec_id=None, pretend=False): """ Create new record in the database @param rec_id: if specified the new record will have this rec_id. @type rec_id: int @return: the allocated rec_id @rtype: int @note: in case of errors will be returned None """ if rec_id is not None: try: rec_id = int(rec_id) except (ValueError, TypeError), error: write_message(" Error during the creation_new_record function : %s " % error, verbose=1, stream=sys.stderr) return None if run_sql("SELECT id FROM bibrec WHERE id=%s", (rec_id, )): write_message(" Error during the creation_new_record function : the requested rec_id %s already exists." % rec_id) return None if pretend: if rec_id: return rec_id else: return run_sql("SELECT max(id)+1 FROM bibrec")[0][0] try: if rec_id is not None: return run_sql("INSERT INTO bibrec (id, creation_date, modification_date) VALUES (%s, NOW(), NOW())", (rec_id, )) else: return run_sql("INSERT INTO bibrec (creation_date, modification_date) VALUES (NOW(), NOW())") except Error, error: write_message(" Error during the creation_new_record function : %s " % error, verbose=1, stream=sys.stderr) return None def insert_bibfmt(id_bibrec, marc, format, modification_date='1970-01-01 00:00:00', pretend=False): """Insert the format in the table bibfmt""" # compress the marc value pickled_marc = compress(marc) try: time.strptime(modification_date, "%Y-%m-%d %H:%M:%S") except ValueError: modification_date = '1970-01-01 00:00:00' query = """INSERT INTO bibfmt (id_bibrec, format, last_updated, value) VALUES (%s, %s, %s, %s)""" try: if not pretend: row_id = run_sql(query, (id_bibrec, format, modification_date, pickled_marc)) return row_id else: return 1 except Error, error: write_message(" Error during the insert_bibfmt function : %s " % error, verbose=1, stream=sys.stderr) return None def insert_record_bibxxx(tag, value, pretend=False): """Insert the record into bibxxx""" # determine into which table one should insert the record table_name = 'bib'+tag[0:2]+'x' # check if the tag, value combination exists in the table query = """SELECT id,value FROM %s """ % table_name query += """ WHERE tag=%s AND value=%s""" params = (tag, value) try: res = run_sql(query, params) except Error, error: write_message(" Error during the insert_record_bibxxx function : %s " % error, verbose=1, stream=sys.stderr) # Note: compare now the found values one by one and look for # string binary equality (e.g. to respect lowercase/uppercase # match), regardless of the charset etc settings. Ideally we # could use a BINARY operator in the above SELECT statement, but # we would have to check compatibility on various MySQLdb versions # etc; this approach checks all matched values in Python, not in # MySQL, which is less cool, but more conservative, so it should # work better on most setups. for row in res: row_id = row[0] row_value = row[1] if row_value == value: return (table_name, row_id) # We got here only when the tag,value combination was not found, # so it is now necessary to insert the tag,value combination into # bibxxx table as new. query = """INSERT INTO %s """ % table_name query += """ (tag, value) values (%s , %s)""" params = (tag, value) try: if not pretend: row_id = run_sql(query, params) else: return (table_name, 1) except Error, error: write_message(" Error during the insert_record_bibxxx function : %s " % error, verbose=1, stream=sys.stderr) return (table_name, row_id) def insert_record_bibrec_bibxxx(table_name, id_bibxxx, field_number, id_bibrec, pretend=False): """Insert the record into bibrec_bibxxx""" # determine into which table one should insert the record full_table_name = 'bibrec_'+ table_name # insert the proper row into the table query = """INSERT INTO %s """ % full_table_name query += """(id_bibrec,id_bibxxx, field_number) values (%s , %s, %s)""" params = (id_bibrec, id_bibxxx, field_number) try: if not pretend: res = run_sql(query, params) else: return 1 except Error, error: write_message(" Error during the insert_record_bibrec_bibxxx" " function 2nd query : %s " % error, verbose=1, stream=sys.stderr) return res def synchronize_8564(rec_id, record, record_had_FFT, pretend=False): """ Synchronize 8564_ tags and BibDocFile tables. This function directly manipulate the record parameter. @type rec_id: positive integer @param rec_id: the record identifier. @param record: the record structure as created by bibrecord.create_record @type record_had_FFT: boolean @param record_had_FFT: True if the incoming bibuploaded-record used FFT @return: the manipulated record (which is also modified as a side effect) """ def merge_marc_into_bibdocfile(field, pretend=False): """ Internal function that reads a single field and store its content in BibDocFile tables. @param field: the 8564_ field containing a BibDocFile URL. """ write_message('Merging field: %s' % (field, ), verbose=9) url = field_get_subfield_values(field, 'u')[:1] or field_get_subfield_values(field, 'q')[:1] description = field_get_subfield_values(field, 'y')[:1] comment = field_get_subfield_values(field, 'z')[:1] if url: recid, docname, format = decompose_bibdocfile_url(url[0]) if recid != rec_id: write_message("INFO: URL %s is not pointing to a fulltext owned by this record (%s)" % (url, recid), stream=sys.stderr) else: try: bibdoc = BibRecDocs(recid).get_bibdoc(docname) if description and not pretend: bibdoc.set_description(description[0], format) if comment and not pretend: bibdoc.set_comment(comment[0], format) except InvenioWebSubmitFileError: ## Apparently the referenced docname doesn't exist anymore. ## Too bad. Let's skip it. write_message("WARNING: docname %s does not seem to exist for record %s. Has it been renamed outside FFT?" % (docname, recid), stream=sys.stderr) def merge_bibdocfile_into_marc(field, subfields): """ Internal function that reads BibDocFile table entries referenced by the URL in the given 8564_ field and integrate the given information directly with the provided subfields. @param field: the 8564_ field containing a BibDocFile URL. @param subfields: the subfields corresponding to the BibDocFile URL generated after BibDocFile tables. """ write_message('Merging subfields %s into field %s' % (subfields, field), verbose=9) subfields = dict(subfields) ## We make a copy not to have side-effects subfield_to_delete = [] for subfield_position, (code, value) in enumerate(field_get_subfield_instances(field)): ## For each subfield instance already existing... if code in subfields: ## ...We substitute it with what is in BibDocFile tables record_modify_subfield(record, '856', code, subfields[code], subfield_position, field_position_global=field[4]) del subfields[code] else: ## ...We delete it otherwise subfield_to_delete.append(subfield_position) subfield_to_delete.sort() for counter, position in enumerate(subfield_to_delete): ## FIXME: Very hackish algorithm. Since deleting a subfield ## will alterate the position of following subfields, we ## are taking note of this and adjusting further position ## by using a counter. record_delete_subfield_from(record, '856', position - counter, field_position_global=field[4]) subfields = subfields.items() subfields.sort() for code, value in subfields: ## Let's add non-previously existing subfields record_add_subfield_into(record, '856', code, value, field_position_global=field[4]) def get_bibdocfile_managed_info(): """ Internal function to eturns a dictionary of BibDocFile URL -> wanna-be subfields. @rtype: mapping @return: BibDocFile URL -> wanna-be subfields dictionary """ ret = {} bibrecdocs = BibRecDocs(rec_id) latest_files = bibrecdocs.list_latest_files(list_hidden=False) for afile in latest_files: url = afile.get_url() ret[url] = {'u' : url} description = afile.get_description() comment = afile.get_comment() subformat = afile.get_subformat() if description: ret[url]['y'] = description if comment: ret[url]['z'] = comment if subformat: ret[url]['x'] = subformat return ret write_message("Synchronizing MARC of recid '%s' with:\n%s" % (rec_id, record), verbose=9) tags856s = record_get_field_instances(record, '856', '%', '%') write_message("Original 856%% instances: %s" % tags856s, verbose=9) tags8564s_to_add = get_bibdocfile_managed_info() write_message("BibDocFile instances: %s" % tags8564s_to_add, verbose=9) positions_tags8564s_to_remove = [] for local_position, field in enumerate(tags856s): if field[1] == '4' and field[2] == ' ': write_message('Analysing %s' % (field, ), verbose=9) for url in field_get_subfield_values(field, 'u') + field_get_subfield_values(field, 'q'): if url in tags8564s_to_add: if record_had_FFT: merge_bibdocfile_into_marc(field, tags8564s_to_add[url]) else: merge_marc_into_bibdocfile(field, pretend=pretend) del tags8564s_to_add[url] break elif bibdocfile_url_p(url) and decompose_bibdocfile_url(url)[0] == rec_id: positions_tags8564s_to_remove.append(local_position) write_message("%s to be deleted and re-synchronized" % (field, ), verbose=9) break record_delete_fields(record, '856', positions_tags8564s_to_remove) tags8564s_to_add = tags8564s_to_add.values() tags8564s_to_add.sort() for subfields in tags8564s_to_add: subfields = subfields.items() subfields.sort() record_add_field(record, '856', '4', ' ', subfields=subfields) write_message('Final record: %s' % record, verbose=9) return record def elaborate_fft_tags(record, rec_id, mode, pretend=False): """ Process FFT tags that should contain $a with file pathes or URLs to get the fulltext from. This function enriches record with proper 8564 URL tags, downloads fulltext files and stores them into var/data structure where appropriate. CFG_BIBUPLOAD_WGET_SLEEP_TIME defines time to sleep in seconds in between URL downloads. Note: if an FFT tag contains multiple $a subfields, we upload them into different 856 URL tags in the metadata. See regression test case test_multiple_fft_insert_via_http(). """ # Let's define some handy sub procedure. def _add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=False): """Adds a new format for a given bibdoc. Returns True when everything's fine.""" write_message('Add new format to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s, flags: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment, flags), verbose=9) try: if not url: # Not requesting a new url. Just updating comment & description return _update_description_and_comment(bibdoc, docname, format, description, comment, flags, pretend=pretend) try: if not pretend: bibdoc.add_file_new_format(url, description=description, comment=comment, flags=flags) except StandardError, e: write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because format already exists (%s)." % (url, format, docname, doctype, newname, description, comment, flags, e), stream=sys.stderr) raise except Exception, e: write_message("Error in adding '%s' as a new format because of: %s" % (url, e), stream=sys.stderr) raise return True def _add_new_version(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=False): """Adds a new version for a given bibdoc. Returns True when everything's fine.""" write_message('Add new version to %s url: %s, format: %s, docname: %s, doctype: %s, newname: %s, description: %s, comment: %s, flags: %s' % (repr(bibdoc), url, format, docname, doctype, newname, description, comment, flags)) try: if not url: return _update_description_and_comment(bibdoc, docname, format, description, comment, flags, pretend=pretend) try: if not pretend: bibdoc.add_file_new_version(url, description=description, comment=comment, flags=flags) except StandardError, e: write_message("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s') not inserted because '%s'." % (url, format, docname, doctype, newname, description, comment, flags, e), stream=sys.stderr) raise except Exception, e: write_message("Error in adding '%s' as a new version because of: %s" % (url, e), stream=sys.stderr) raise return True def _update_description_and_comment(bibdoc, docname, format, description, comment, flags, pretend=False): """Directly update comments and descriptions.""" write_message('Just updating description and comment for %s with format %s with description %s, comment %s and flags %s' % (docname, format, description, comment, flags), verbose=9) try: if not pretend: bibdoc.set_description(description, format) bibdoc.set_comment(comment, format) for flag in CFG_BIBDOCFILE_AVAILABLE_FLAGS: if flag in flags: bibdoc.set_flag(flag, format) else: bibdoc.unset_flag(flag, format) except StandardError, e: write_message("('%s', '%s', '%s', '%s', '%s') description and comment not updated because '%s'." % (docname, format, description, comment, flags, e)) raise return True if mode == 'delete': raise StandardError('FFT tag specified but bibupload executed in --delete mode') tuple_list = extract_tag_from_record(record, 'FFT') if tuple_list: # FFT Tags analysis write_message("FFTs: "+str(tuple_list), verbose=9) docs = {} # docnames and their data for fft in record_get_field_instances(record, 'FFT', ' ', ' '): # Let's discover the type of the document # This is a legacy field and will not be enforced any particular # check on it. doctype = field_get_subfield_values(fft, 't') if doctype: doctype = doctype[0] else: # Default is Main doctype = 'Main' # Let's discover the url. url = field_get_subfield_values(fft, 'a') if url: url = url[0] try: check_valid_url(url) except StandardError, e: raise StandardError, "fft '%s' specifies in $a a location ('%s') with problems: %s" % (fft, url, e) else: url = '' # Let's discover the description description = field_get_subfield_values(fft, 'd') if description != []: description = description[0] else: if mode == 'correct' and doctype != 'FIX-MARC': ## If the user require to correct, and do not specify ## a description this means she really want to ## modify the description. description = '' else: description = KEEP_OLD_VALUE # Let's discover the desired docname to be created/altered name = field_get_subfield_values(fft, 'n') if name: ## Let's remove undesired extensions name = file_strip_ext(name[0] + '.pdf') else: if url: name = get_docname_from_url(url) elif mode != 'correct' and doctype != 'FIX-MARC': raise StandardError, "Warning: fft '%s' doesn't specifies either a location in $a or a docname in $n" % str(fft) else: continue # Let's discover the desired new docname in case we want to change it newname = field_get_subfield_values(fft, 'm') if newname: newname = file_strip_ext(newname[0] + '.pdf') else: newname = name # Let's discover the desired format format = field_get_subfield_values(fft, 'f') if format: format = normalize_format(format[0]) else: if url: format = guess_format_from_url(url) else: format = "" # Let's discover the icon icon = field_get_subfield_values(fft, 'x') if icon != []: icon = icon[0] if icon != KEEP_OLD_VALUE: try: check_valid_url(icon) except StandardError, e: raise StandardError, "fft '%s' specifies in $x an icon ('%s') with problems: %s" % (fft, icon, e) else: icon = '' # Let's discover the comment comment = field_get_subfield_values(fft, 'z') if comment != []: comment = comment[0] else: if mode == 'correct' and doctype != 'FIX-MARC': ## See comment on description comment = '' else: comment = KEEP_OLD_VALUE # Let's discover the restriction restriction = field_get_subfield_values(fft, 'r') if restriction != []: restriction = restriction[0] else: if mode == 'correct' and doctype != 'FIX-MARC': ## See comment on description restriction = '' else: restriction = KEEP_OLD_VALUE version = field_get_subfield_values(fft, 'v') if version: version = version[0] else: version = '' flags = field_get_subfield_values(fft, 'o') for flag in flags: if flag not in CFG_BIBDOCFILE_AVAILABLE_FLAGS: raise StandardError, "fft '%s' specifies a non available flag: %s" % (fft, flag) if docs.has_key(name): # new format considered (doctype2, newname2, restriction2, version2, urls) = docs[name] if doctype2 != doctype: raise StandardError, "fft '%s' specifies a different doctype from previous fft with docname '%s'" % (str(fft), name) if newname2 != newname: raise StandardError, "fft '%s' specifies a different newname from previous fft with docname '%s'" % (str(fft), name) if restriction2 != restriction: raise StandardError, "fft '%s' specifies a different restriction from previous fft with docname '%s'" % (str(fft), name) if version2 != version: raise StandardError, "fft '%x' specifies a different version than the previous fft with docname '%s'" % (str(fft), name) for (url2, format2, description2, comment2, flags2) in urls: if format == format2: raise StandardError, "fft '%s' specifies a second file '%s' with the same format '%s' from previous fft with docname '%s'" % (str(fft), url, format, name) if url or format: urls.append((url, format, description, comment, flags)) if icon: urls.append((icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags)) else: if url or format: docs[name] = (doctype, newname, restriction, version, [(url, format, description, comment, flags)]) if icon: docs[name][4].append((icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags)) elif icon: docs[name] = (doctype, newname, restriction, version, [(icon, icon[len(file_strip_ext(icon)):] + ';icon', description, comment, flags)]) else: docs[name] = (doctype, newname, restriction, version, []) write_message('Result of FFT analysis:\n\tDocs: %s' % (docs,), verbose=9) # Let's remove all FFT tags record_delete_field(record, 'FFT', ' ', ' ') # Preprocessed data elaboration bibrecdocs = BibRecDocs(rec_id) ## Let's pre-download all the URLs to see if, in case of mode 'correct' or 'append' ## we can avoid creating a new revision. for docname, (doctype, newname, restriction, version, urls) in docs.items(): downloaded_urls = [] try: bibdoc = bibrecdocs.get_bibdoc(docname) except InvenioWebSubmitFileError: ## A bibdoc with the given docname does not exists. ## So there is no chance we are going to revise an existing ## format with an identical file :-) bibdoc = None new_revision_needed = False for url, format, description, comment, flags in urls: if url: try: downloaded_url = download_url(url, format) write_message("%s saved into %s" % (url, downloaded_url), verbose=9) except Exception, err: write_message("Error in downloading '%s' because of: %s" % (url, err), stream=sys.stderr) raise if mode == 'correct' and bibdoc is not None and not new_revision_needed: downloaded_urls.append((downloaded_url, format, description, comment, flags)) if not bibdoc.check_file_exists(downloaded_url): new_revision_needed = True else: write_message("WARNING: %s is already attached to bibdoc %s for recid %s" % (url, docname, rec_id), stream=sys.stderr) elif mode == 'append' and bibdoc is not None: if not bibdoc.check_file_exists(downloaded_url): downloaded_urls.append((downloaded_url, format, description, comment, flags)) else: write_message("WARNING: %s is already attached to bibdoc %s for recid %s" % (url, docname, rec_id), stream=sys.stderr) else: downloaded_urls.append((downloaded_url, format, description, comment, flags)) else: downloaded_urls.append(('', format, description, comment, flags)) if mode == 'correct' and bibdoc is not None and not new_revision_needed: ## Since we don't need a new revision (because all the files ## that are being uploaded are different) ## we can simply remove the urls but keep the other information write_message("No need to add a new revision for docname %s for recid %s" % (docname, rec_id), verbose=2) docs[docname] = (doctype, newname, restriction, version, [('', format, description, comment, flags) for (dummy, format, description, comment, flags) in downloaded_urls]) for downloaded_url, dummy, dummy, dummy, dummy in downloaded_urls: ## Let's free up some space :-) if downloaded_url and os.path.exists(downloaded_url): os.remove(downloaded_url) else: if downloaded_urls or mode != 'append': docs[docname] = (doctype, newname, restriction, version, downloaded_urls) else: ## In case we are in append mode and there are no urls to append ## we discard the whole FFT del docs[docname] if mode == 'replace': # First we erase previous bibdocs if not pretend: for bibdoc in bibrecdocs.list_bibdocs(): bibdoc.delete() bibrecdocs.build_bibdoc_list() for docname, (doctype, newname, restriction, version, urls) in docs.iteritems(): write_message("Elaborating olddocname: '%s', newdocname: '%s', doctype: '%s', restriction: '%s', urls: '%s', mode: '%s'" % (docname, newname, doctype, restriction, urls, mode), verbose=9) if mode in ('insert', 'replace'): # new bibdocs, new docnames, new marc if newname in bibrecdocs.get_bibdoc_names(): write_message("('%s', '%s') not inserted because docname already exists." % (newname, urls), stream=sys.stderr) raise StandardError try: if not pretend: bibdoc = bibrecdocs.add_bibdoc(doctype, newname) bibdoc.set_status(restriction) else: bibdoc = None except Exception, e: write_message("('%s', '%s', '%s') not inserted because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr) raise StandardError for (url, format, description, comment, flags) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=pretend)) elif mode == 'replace_or_insert': # to be thought as correct_or_insert for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == docname: if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'): if newname != docname: try: if not pretend: bibdoc.change_name(newname) ## Let's refresh the list of bibdocs. bibrecdocs.build_bibdoc_list() except StandardError, e: write_message(e, stream=sys.stderr) raise found_bibdoc = False for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == newname: found_bibdoc = True if doctype == 'PURGE': if not pretend: bibdoc.purge() elif doctype == 'DELETE': if not pretend: bibdoc.delete() elif doctype == 'EXPUNGE': if not pretend: bibdoc.expunge() elif doctype == 'FIX-ALL': if not pretend: bibrecdocs.fix(docname) elif doctype == 'FIX-MARC': pass elif doctype == 'DELETE-FILE': if urls: for (url, format, description, comment, flags) in urls: if not pretend: bibdoc.delete_file(format, version) elif doctype == 'REVERT': try: if not pretend: bibdoc.revert(version) except Exception, e: write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr) raise else: if restriction != KEEP_OLD_VALUE: if not pretend: bibdoc.set_status(restriction) # Since the docname already existed we have to first # bump the version by pushing the first new file # then pushing the other files. if urls: (first_url, first_format, first_description, first_comment, first_flags) = urls[0] other_urls = urls[1:] assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment, first_flags, pretend=pretend)) for (url, format, description, comment, flags) in other_urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=pretend)) ## Let's refresh the list of bibdocs. bibrecdocs.build_bibdoc_list() if not found_bibdoc: if not pretend: bibdoc = bibrecdocs.add_bibdoc(doctype, newname) bibdoc.set_status(restriction) for (url, format, description, comment, flags) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags)) elif mode == 'correct': for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == docname: if doctype not in ('PURGE', 'DELETE', 'EXPUNGE', 'REVERT', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE'): if newname != docname: try: if not pretend: bibdoc.change_name(newname) ## Let's refresh the list of bibdocs. bibrecdocs.build_bibdoc_list() except StandardError, e: write_message('Error in renaming %s to %s: %s' % (docname, newname, e), stream=sys.stderr) raise found_bibdoc = False for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == newname: found_bibdoc = True if doctype == 'PURGE': if not pretend: bibdoc.purge() elif doctype == 'DELETE': if not pretend: bibdoc.delete() elif doctype == 'EXPUNGE': if not pretend: bibdoc.expunge() elif doctype == 'FIX-ALL': if not pretend: bibrecdocs.fix(newname) elif doctype == 'FIX-MARC': pass elif doctype == 'DELETE-FILE': if urls: for (url, format, description, comment, flags) in urls: if not pretend: bibdoc.delete_file(format, version) elif doctype == 'REVERT': try: if not pretend: bibdoc.revert(version) except Exception, e: write_message('(%s, %s) not correctly reverted: %s' % (newname, version, e), stream=sys.stderr) raise else: if restriction != KEEP_OLD_VALUE: if not pretend: bibdoc.set_status(restriction) if urls: (first_url, first_format, first_description, first_comment, first_flags) = urls[0] other_urls = urls[1:] assert(_add_new_version(bibdoc, first_url, first_format, docname, doctype, newname, first_description, first_comment, first_flags, pretend=pretend)) for (url, format, description, comment, flags) in other_urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=pretend)) ## Let's refresh the list of bibdocs. bibrecdocs.build_bibdoc_list() if not found_bibdoc: if doctype in ('PURGE', 'DELETE', 'EXPUNGE', 'FIX-ALL', 'FIX-MARC', 'DELETE-FILE', 'REVERT'): write_message("('%s', '%s', '%s') not performed because '%s' docname didn't existed." % (doctype, newname, urls, docname), stream=sys.stderr) raise StandardError else: if not pretend: bibdoc = bibrecdocs.add_bibdoc(doctype, newname) bibdoc.set_status(restriction) for (url, format, description, comment, flags) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags)) elif mode == 'append': try: found_bibdoc = False for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.get_docname() == docname: found_bibdoc = True for (url, format, description, comment, flags) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags, pretend=pretend)) if not found_bibdoc: try: if not pretend: bibdoc = bibrecdocs.add_bibdoc(doctype, docname) bibdoc.set_status(restriction) for (url, format, description, comment, flags) in urls: assert(_add_new_format(bibdoc, url, format, docname, doctype, newname, description, comment, flags)) except Exception, e: register_exception() write_message("('%s', '%s', '%s') not appended because: '%s'." % (doctype, newname, urls, e), stream=sys.stderr) raise except: register_exception() raise return record def insert_fmt_tags(record, rec_id, opt_mode, pretend=False): """Process and insert FMT tags""" fmt_fields = record_get_field_instances(record, 'FMT') if fmt_fields: for fmt_field in fmt_fields: # Get the d, f, g subfields of the FMT tag try: d_value = field_get_subfield_values(fmt_field, "d")[0] except IndexError: d_value = "" try: f_value = field_get_subfield_values(fmt_field, "f")[0] except IndexError: f_value = "" try: g_value = field_get_subfield_values(fmt_field, "g")[0] except IndexError: g_value = "" # Update the format if not pretend: res = update_bibfmt_format(rec_id, g_value, f_value, d_value, pretend=pretend) if res == 1: write_message(" Failed: Error during update_bibfmt", verbose=1, stream=sys.stderr) # If we are in format mode, we only care about the FMT tag if opt_mode == 'format': return 0 # We delete the FMT Tag of the record record_delete_field(record, 'FMT') write_message(" -Delete field FMT from record : DONE", verbose=2) return record elif opt_mode == 'format': write_message(" Failed: Format updated failed : No tag FMT found", verbose=1, stream=sys.stderr) return None else: return record ### Update functions def update_bibrec_modif_date(now, bibrec_id, pretend=False): """Update the date of the record in bibrec table """ query = """UPDATE bibrec SET modification_date=%s WHERE id=%s""" params = (now, bibrec_id) try: if not pretend: run_sql(query, params) write_message(" -Update record modification date : DONE" , verbose=2) except Error, error: write_message(" Error during update_bibrec_modif_date function : %s" % error, verbose=1, stream=sys.stderr) def update_bibfmt_format(id_bibrec, format_value, format_name, modification_date=None, pretend=False): """Update the format in the table bibfmt""" if modification_date is None: modification_date = time.strftime('%Y-%m-%d %H:%M:%S') else: try: time.strptime(modification_date, "%Y-%m-%d %H:%M:%S") except ValueError: modification_date = '1970-01-01 00:00:00' # We check if the format is already in bibFmt nb_found = find_record_format(id_bibrec, format_name) if nb_found == 1: # we are going to update the format # compress the format_value value pickled_format_value = compress(format_value) # update the format: query = """UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=%s AND format=%s""" params = (modification_date, pickled_format_value, id_bibrec, format_name) try: if not pretend: row_id = run_sql(query, params) if not pretend and row_id is None: write_message(" Failed: Error during update_bibfmt_format function", verbose=1, stream=sys.stderr) return 1 else: write_message(" -Update the format %s in bibfmt : DONE" % format_name , verbose=2) return 0 except Error, error: write_message(" Error during the update_bibfmt_format function : %s " % error, verbose=1, stream=sys.stderr) elif nb_found > 1: write_message(" Failed: Same format %s found several time in bibfmt for the same record." % format_name, verbose=1, stream=sys.stderr) return 1 else: # Insert the format information in BibFMT res = insert_bibfmt(id_bibrec, format_value, format_name, modification_date, pretend=pretend) if res is None: write_message(" Failed: Error during insert_bibfmt", verbose=1, stream=sys.stderr) return 1 else: write_message(" -Insert the format %s in bibfmt : DONE" % format_name , verbose=2) return 0 def delete_bibfmt_format(id_bibrec, format_name, pretend=False): """ Delete format FORMAT_NAME from bibfmt table for record ID_BIBREC. """ if not pretend: run_sql("DELETE FROM bibfmt WHERE id_bibrec=%s and format=%s", (id_bibrec, format_name)) return 0 def archive_marcxml_for_history(recID, pretend=False): """ Archive current MARCXML format of record RECID from BIBFMT table into hstRECORD table. Useful to keep MARCXML history of records. Return 0 if everything went fine. Return 1 otherwise. """ try: res = run_sql("SELECT id_bibrec, value, last_updated FROM bibfmt WHERE format='xm' AND id_bibrec=%s", (recID,)) if res and not pretend: run_sql("""INSERT INTO hstRECORD (id_bibrec, marcxml, job_id, job_name, job_person, job_date, job_details) VALUES (%s,%s,%s,%s,%s,%s,%s)""", (res[0][0], res[0][1], task_get_task_param('task_id', 0), 'bibupload', task_get_task_param('user','UNKNOWN'), res[0][2], 'mode: ' + task_get_option('mode','UNKNOWN') + '; file: ' + task_get_option('file_path','UNKNOWN') + '.')) except Error, error: write_message(" Error during archive_marcxml_for_history: %s " % error, verbose=1, stream=sys.stderr) return 1 return 0 def update_database_with_metadata(record, rec_id, oai_rec_id = "oai", pretend=False): """Update the database tables with the record and the record id given in parameter""" for tag in record.keys(): # check if tag is not a special one: if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS: # for each tag there is a list of tuples representing datafields tuple_list = record[tag] # this list should contain the elements of a full tag [tag, ind1, ind2, subfield_code] tag_list = [] tag_list.append(tag) for single_tuple in tuple_list: # these are the contents of a single tuple subfield_list = single_tuple[0] ind1 = single_tuple[1] ind2 = single_tuple[2] # append the ind's to the full tag if ind1 == '' or ind1 == ' ': tag_list.append('_') else: tag_list.append(ind1) if ind2 == '' or ind2 == ' ': tag_list.append('_') else: tag_list.append(ind2) datafield_number = single_tuple[4] if tag in CFG_BIBUPLOAD_SPECIAL_TAGS: # nothing to do for special tags (FFT, FMT) pass elif tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS and tag != "001": value = single_tuple[3] # get the full tag full_tag = ''.join(tag_list) # update the tables write_message(" insertion of the tag "+full_tag+" with the value "+value, verbose=9) # insert the tag and value into into bibxxx (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value, pretend=pretend) #print 'tname, bibrow', table_name, bibxxx_row_id; if table_name is None or bibxxx_row_id is None: write_message(" Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr) # connect bibxxx and bibrec with the table bibrec_bibxxx res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id, pretend=pretend) if res is None: write_message(" Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr) else: # get the tag and value from the content of each subfield for subfield in subfield_list: subtag = subfield[0] value = subfield[1] tag_list.append(subtag) # get the full tag full_tag = ''.join(tag_list) # update the tables write_message(" insertion of the tag "+full_tag+" with the value "+value, verbose=9) # insert the tag and value into into bibxxx (table_name, bibxxx_row_id) = insert_record_bibxxx(full_tag, value, pretend=pretend) if table_name is None or bibxxx_row_id is None: write_message(" Failed : during insert_record_bibxxx", verbose=1, stream=sys.stderr) # connect bibxxx and bibrec with the table bibrec_bibxxx res = insert_record_bibrec_bibxxx(table_name, bibxxx_row_id, datafield_number, rec_id, pretend=pretend) if res is None: write_message(" Failed : during insert_record_bibrec_bibxxx", verbose=1, stream=sys.stderr) # remove the subtag from the list tag_list.pop() tag_list.pop() tag_list.pop() tag_list.pop() write_message(" -Update the database with metadata : DONE", verbose=2) log_record_uploading(oai_rec_id, task_get_task_param('task_id', 0), rec_id, 'P', pretend=pretend) def append_new_tag_to_old_record(record, rec_old, opt_tag, opt_mode): """Append new tags to a old record""" def _append_tag(tag): # Reference mode append only reference tag if opt_mode == 'reference': if tag == CFG_BIBUPLOAD_REFERENCE_TAG: for single_tuple in record[tag]: # We retrieve the information of the tag subfield_list = single_tuple[0] ind1 = single_tuple[1] ind2 = single_tuple[2] # We add the datafield to the old record write_message(" Adding tag: %s ind1=%s ind2=%s code=%s" % (tag, ind1, ind2, subfield_list), verbose=9) newfield_number = record_add_field(rec_old, tag, ind1, ind2, subfields=subfield_list) if newfield_number is None: write_message(" Error when adding the field"+tag, verbose=1, stream=sys.stderr) else: if tag in CFG_BIBUPLOAD_CONTROLFIELD_TAGS: if tag == '001': pass else: # if it is a controlfield,just access the value for single_tuple in record[tag]: controlfield_value = single_tuple[3] # add the field to the old record newfield_number = record_add_field(rec_old, tag, controlfield_value=controlfield_value) if newfield_number is None: write_message(" Error when adding the field"+tag, verbose=1, stream=sys.stderr) else: # For each tag there is a list of tuples representing datafields for single_tuple in record[tag]: # We retrieve the information of the tag subfield_list = single_tuple[0] ind1 = single_tuple[1] ind2 = single_tuple[2] if '%s%s%s' % (tag, ind1 == ' ' and '_' or ind1, ind2 == ' ' and '_' or ind2) in (CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[:5], CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[:5]): ## We don't want to append the external identifier ## if it is already existing. if record_find_field(rec_old, tag, single_tuple)[0] is not None: write_message(" Not adding tag: %s ind1=%s ind2=%s subfields=%s: it's already there" % (tag, ind1, ind2, subfield_list), verbose=9) continue # We add the datafield to the old record write_message(" Adding tag: %s ind1=%s ind2=%s subfields=%s" % (tag, ind1, ind2, subfield_list), verbose=9) newfield_number = record_add_field(rec_old, tag, ind1, ind2, subfields=subfield_list) if newfield_number is None: write_message(" Error when adding the field"+tag, verbose=1, stream=sys.stderr) if opt_tag is not None: _append_tag(opt_tag) else: # Go through each tag in the appended record for tag in record: _append_tag(tag) return rec_old def copy_strong_tags_from_old_record(record, rec_old): """ Look for strong tags in RECORD and REC_OLD. If no strong tags are found in RECORD, then copy them over from REC_OLD. This function modifies RECORD structure on the spot. """ for strong_tag in CFG_BIBUPLOAD_STRONG_TAGS: if not record_get_field_instances(record, strong_tag): strong_tag_old_field_instances = record_get_field_instances(rec_old, strong_tag) if strong_tag_old_field_instances: for strong_tag_old_field_instance in strong_tag_old_field_instances: sf_vals, fi_ind1, fi_ind2, controlfield, dummy = strong_tag_old_field_instance record_add_field(record, strong_tag, fi_ind1, fi_ind2, controlfield, sf_vals) return ### Delete functions def delete_tags(record, rec_old): """ Returns a record structure with all the fields in rec_old minus the fields in record. @param record: The record containing tags to delete. @type record: record structure @param rec_old: The original record. @type rec_old: record structure @return: The modified record. @rtype: record structure """ returned_record = copy.deepcopy(rec_old) for tag, fields in record.iteritems(): if tag in ('001', ): continue for field in fields: local_position = record_find_field(returned_record, tag, field)[1] if local_position is not None: record_delete_field(returned_record, tag, field_position_local=local_position) return returned_record def delete_tags_to_correct(record, rec_old, opt_tag): """ Delete tags from REC_OLD which are also existing in RECORD. When deleting, pay attention not only to tags, but also to indicators, so that fields with the same tags but different indicators are not deleted. """ ## Some fields are controlled via provenance information. ## We should re-add saved fields at the end. fields_to_readd = {} for tag in CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS: if tag[:3] in record: tmp_field_instances = record_get_field_instances(record, tag[:3], tag[3], tag[4]) ## Let's discover the provenance that will be updated provenances_to_update = [] for instance in tmp_field_instances: for code, value in instance[0]: if code == tag[5]: if value not in provenances_to_update: provenances_to_update.append(value) break else: ## The provenance is not specified. ## let's add the special empty provenance. if '' not in provenances_to_update: provenances_to_update.append('') potential_fields_to_readd = record_get_field_instances(rec_old, tag[:3], tag[3], tag[4]) ## Let's take all the field corresponding to tag ## Let's save apart all the fields that should be updated, but ## since they have a different provenance not mentioned in record ## they should be preserved. fields = [] for sf_vals, ind1, ind2, dummy_cf, dummy_line in potential_fields_to_readd: for code, value in sf_vals: if code == tag[5]: if value not in provenances_to_update: fields.append(sf_vals) break else: if '' not in provenances_to_update: ## Empty provenance, let's protect in any case fields.append(sf_vals) fields_to_readd[tag] = fields # browse through all the tags from the MARCXML file: for tag in record: # do we have to delete only a special tag or any tag? if opt_tag is None or opt_tag == tag: # check if the tag exists in the old record too: if tag in rec_old and tag != '001': # the tag does exist, so delete all record's tag+ind1+ind2 combinations from rec_old for dummy_sf_vals, ind1, ind2, dummy_cf, field_number in record[tag]: write_message(" Delete tag: " + tag + " ind1=" + ind1 + " ind2=" + ind2, verbose=9) record_delete_field(rec_old, tag, ind1, ind2) ## Ok, we readd necessary fields! for tag, fields in fields_to_readd.iteritems(): for sf_vals in fields: write_message(" Adding tag: " + tag[:3] + " ind1=" + tag[3] + " ind2=" + tag[4] + " code=" + str(sf_vals), verbose=9) record_add_field(rec_old, tag[:3], tag[3], tag[4], subfields=sf_vals) def delete_bibrec_bibxxx(record, id_bibrec, pretend=False): """Delete the database record from the table bibxxx given in parameters""" # we clear all the rows from bibrec_bibxxx from the old record for tag in record.keys(): if tag not in CFG_BIBUPLOAD_SPECIAL_TAGS: # for each name construct the bibrec_bibxxx table name table_name = 'bibrec_bib'+tag[0:2]+'x' # delete all the records with proper id_bibrec query = """DELETE FROM `%s` where id_bibrec = %s""" params = (table_name, id_bibrec) if not pretend: try: run_sql(query % params) except Error, error: write_message(" Error during the delete_bibrec_bibxxx function : %s " % error, verbose=1, stream=sys.stderr) def main(): """Main that construct all the bibtask.""" task_init(authorization_action='runbibupload', authorization_msg="BibUpload Task Submission", description="""Receive MARC XML file and update appropriate database tables according to options. Examples: $ bibupload -i input.xml """, help_specific_usage=""" -a, --append\t\tnew fields are appended to the existing record -c, --correct\t\tfields are replaced by the new ones in the existing record, except \t\t\twhen overridden by CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS -f, --format\t\ttakes only the FMT fields into account. Does not update -i, --insert\t\tinsert the new record in the database -r, --replace\t\tthe existing record is entirely replaced by the new one, \t\t\texcept for fields in CFG_BIBUPLOAD_STRONG_TAGS -z, --reference\tupdate references (update only 999 fields) -d, --delete\t\tspecified fields are deleted in existing record -S, --stage=STAGE\tstage to start from in the algorithm (0: always done; 1: FMT tags; \t\t\t2: FFT tags; 3: BibFmt; 4: Metadata update; 5: time update) -n, --notimechange\tdo not change record last modification date when updating -o, --holdingpen\tInsert record into holding pen instead of the normal database --pretend\t\tdo not really insert/append/correct/replace the input file --force\t\twhen --replace, use provided 001 tag values, even if the matching \t\t\trecord does not exist (thus allocating it on-the-fly) --callback-url\tSend via a POST request a JSON-serialized answer (see admin guide), in \t\t\torder to provide a feedback to an external service about the outcome of the operation. + --nonce\t\twhen used together with --callback add the nonce value in the JSON message. """, version=__revision__, specific_params=("ircazdS:fno", [ "insert", "replace", "correct", "append", "reference", "delete", "stage=", "format", "notimechange", "holdingpen", "pretend", "force", - "callback-url=" + "callback-url=", + "nonce=" ]), task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, task_run_fnc=task_run_core) def task_submit_elaborate_specific_parameter(key, value, opts, args): """ Given the string key it checks it's meaning, eventually using the value. Usually it fills some key in the options dict. It must return True if it has elaborated the key, False, if it doesn't know that key. eg: if key in ['-n', '--number']: task_get_option(\1) = value return True return False """ # No time change option if key in ("-n", "--notimechange"): task_set_option('notimechange', 1) # Insert mode option elif key in ("-i", "--insert"): if task_get_option('mode') == 'replace': # if also replace found, then set to replace_or_insert task_set_option('mode', 'replace_or_insert') else: task_set_option('mode', 'insert') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Replace mode option elif key in ("-r", "--replace"): if task_get_option('mode') == 'insert': # if also insert found, then set to replace_or_insert task_set_option('mode', 'replace_or_insert') else: task_set_option('mode', 'replace') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Holding pen mode option elif key in ("-o", "--holdingpen"): write_message("Holding pen mode", verbose=3) task_set_option('mode', 'holdingpen') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Correct mode option elif key in ("-c", "--correct"): task_set_option('mode', 'correct') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Append mode option elif key in ("-a", "--append"): task_set_option('mode', 'append') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Reference mode option elif key in ("-z", "--reference"): task_set_option('mode', 'reference') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) elif key in ("-d", "--delete"): task_set_option('mode', 'delete') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Format mode option elif key in ("-f", "--format"): task_set_option('mode', 'format') fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) elif key in ("--pretend",): task_set_option('pretend', True) fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) elif key in ("--force",): task_set_option('force', True) fix_argv_paths([args[0]]) task_set_option('file_path', os.path.abspath(args[0])) # Stage elif key in ("-S", "--stage"): try: value = int(value) except ValueError: print >> sys.stderr, """The value specified for --stage must be a valid integer, not %s""" % value return False if not (0 <= value <= 5): print >> sys.stderr, """The value specified for --stage must be comprised between 0 and 5""" return False task_set_option('stage_to_start_from', value) elif key in ("--callback-url", ): task_set_option('callback_url', value) + elif key in ("--nonce", ): + task_set_option('nonce', value) else: return False return True def task_submit_check_options(): """ Reimplement this method for having the possibility to check options before submitting the task, in order for example to provide default values. It must return False if there are errors in the options. """ if task_get_option('mode') is None: write_message("Please specify at least one update/insert mode!") return False if task_get_option('file_path') is None: write_message("Missing filename! -h for help.") return False return True def writing_rights_p(): """Return True in case bibupload has the proper rights to write in the fulltext file folder.""" global _WRITING_RIGHTS if _WRITING_RIGHTS is not None: return _WRITING_RIGHTS try: if not os.path.exists(CFG_WEBSUBMIT_FILEDIR): os.makedirs(CFG_WEBSUBMIT_FILEDIR) fd, filename = tempfile.mkstemp(suffix='.txt', prefix='test', dir=CFG_WEBSUBMIT_FILEDIR) test = os.fdopen(fd, 'w') test.write('TEST') test.close() if open(filename).read() != 'TEST': raise IOError("Can not successfully write and readback %s" % filename) os.remove(filename) except: register_exception(alert_admin=True) return False return True def post_results_to_callback_url(results, callback_url): if not CFG_JSON_AVAILABLE: from warnings import warn warn("--callback-url used but simplejson/json not available") return json_results = json.dumps(results) ## :///?# scheme, netloc, path, query, fragment = urlparse.urlsplit(callback_url) ## See: http://stackoverflow.com/questions/111945/is-there-any-way-to-do-http-put-in-python if scheme == 'http': opener = urllib2.build_opener(urllib2.HTTPHandler) elif scheme == 'https': opener = urllib2.build_opener(urllib2.HTTPSHandler) else: raise ValueError("Scheme not handled %s for callback_url %s" % (scheme, callback_url)) request = urllib2.Request(callback_url, data=json_results) request.add_header('Content-Type', 'application/json') + request.add_header('User-Agent', make_user_agent_string('BibUpload')) request.get_method = lambda: 'POST' return opener.open(request) def task_run_core(): """ Reimplement to add the body of the task.""" error = 0 write_message("Input file '%s', input mode '%s'." % (task_get_option('file_path'), task_get_option('mode'))) write_message("STAGE 0:", verbose=2) if task_get_option('file_path') is not None: write_message("start preocessing", verbose=3) task_update_progress("Reading XML input") recs = xml_marc_to_records(open_marc_file(task_get_option('file_path'))) stat['nb_records_to_upload'] = len(recs) write_message(" -Open XML marc: DONE", verbose=2) task_sleep_now_if_required(can_stop_too=True) write_message("Entering records loop", verbose=3) callback_url = task_get_option('callback_url') results_for_callback = {'results': []} if recs is not None: # We proceed each record by record for record in recs: record_id = record_extract_oai_id(record) task_sleep_now_if_required(can_stop_too=True) if task_get_option("mode") == "holdingpen": #inserting into the holding pen write_message("Inserting into holding pen", verbose=3) insert_record_into_holding_pen(record, record_id) else: write_message("Inserting into main database", verbose=3) error = bibupload( record, opt_tag=task_get_option('tag'), opt_mode=task_get_option('mode'), opt_stage_to_start_from=task_get_option('stage_to_start_from'), opt_notimechange=task_get_option('notimechange'), oai_rec_id=record_id, pretend=task_get_option('pretend')) if error[0] == 1: if record: write_message(record_xml_output(record), stream=sys.stderr) else: write_message("Record could not have been parsed", stream=sys.stderr) stat['nb_errors'] += 1 if callback_url: results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]}) elif error[0] == 2: if record: write_message(record_xml_output(record), stream=sys.stderr) else: write_message("Record could not have been parsed", stream=sys.stderr) if callback_url: results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]}) elif error[0] == 0: if callback_url: from invenio.search_engine import print_record results_for_callback['results'].append({'recid': error[1], 'success': True, "marcxml": print_record(error[1], 'xm'), 'url': "%s/%s/%s" % (CFG_SITE_URL, CFG_SITE_RECORD, error[1])}) else: if callback_url: results_for_callback['results'].append({'recid': error[1], 'success': False, 'error_message': error[2]}) task_update_progress("Done %d out of %d." % \ (stat['nb_records_inserted'] + \ stat['nb_records_updated'], stat['nb_records_to_upload'])) else: write_message(" Error bibupload failed: No record found", verbose=1, stream=sys.stderr) callback_url = task_get_option("callback_url") if callback_url: + nonce = task_get_option("nonce") + if nonce: + results_for_callback["nonce"] = nonce post_results_to_callback_url(results_for_callback, callback_url) if task_get_task_param('verbose') >= 1: # Print out the statistics print_out_bibupload_statistics() # Check if they were errors return not stat['nb_errors'] >= 1 def log_record_uploading(oai_rec_id, task_id, bibrec_id, insertion_db, pretend=False): if oai_rec_id != "" and oai_rec_id != None: query = """UPDATE oaiHARVESTLOG SET date_inserted=NOW(), inserted_to_db=%s, id_bibrec=%s WHERE oai_id = %s AND bibupload_task_id = %s ORDER BY date_harvested LIMIT 1""" try: if not pretend: run_sql(query, (str(insertion_db), str(bibrec_id), str(oai_rec_id), str(task_id), )) except Error, error: write_message(" Error during the log_record_uploading function : %s " % error, verbose=1, stream=sys.stderr) if __name__ == "__main__": main() diff --git a/modules/bibupload/lib/bibupload_regression_tests.py b/modules/bibupload/lib/bibupload_regression_tests.py index 7b4e83799..00cd9a37d 100644 --- a/modules/bibupload/lib/bibupload_regression_tests.py +++ b/modules/bibupload/lib/bibupload_regression_tests.py @@ -1,4342 +1,4344 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0301 """Regression tests for the BibUpload.""" __revision__ = "$Id$" import re import unittest import datetime import os import time import sys from urllib import urlencode from urllib2 import urlopen import pprint if sys.hexversion < 0x2060000: from md5 import md5 else: from hashlib import md5 from invenio.config import CFG_OAI_ID_FIELD, CFG_PREFIX, CFG_SITE_URL, CFG_TMPDIR, \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG, \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG, \ CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG, \ CFG_WEBDIR, \ CFG_BINDIR, \ CFG_SITE_RECORD, \ CFG_DEVEL_SITE from invenio.access_control_config import CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS from invenio import bibupload from invenio.search_engine import print_record, get_record from invenio.jsonutils import json from invenio.dbquery import run_sql, get_table_status_info from invenio.dateutils import convert_datestruct_to_datetext from invenio.testutils import make_test_suite, run_test_suite, test_web_page_content from invenio.bibdocfile import BibRecDocs from invenio.bibtask import task_set_task_param, setup_loggers, task_set_option, task_low_level_submission from invenio.bibrecord import record_has_field,record_get_field_value +from invenio.shellutils import run_shell_command + # helper functions: def remove_tag_001_from_xmbuffer(xmbuffer): """Remove tag 001 from MARCXML buffer. Useful for testing two MARCXML buffers without paying attention to recIDs attributed during the bibupload. """ return re.sub(r'.*', '', xmbuffer) def compare_xmbuffers(xmbuffer1, xmbuffer2): """Compare two XM (XML MARC) buffers by removing whitespaces and version numbers in tags 005 before testing. """ def remove_blanks_from_xmbuffer(xmbuffer): """Remove \n and blanks from XMBUFFER.""" out = xmbuffer.replace("\n", "") out = out.replace(" ", "") return out # remove 005 revision numbers: xmbuffer1 = re.sub(r'.*?', '', xmbuffer1) xmbuffer2 = re.sub(r'.*?', '', xmbuffer2) # remove whitespace: xmbuffer1 = remove_blanks_from_xmbuffer(xmbuffer1) xmbuffer2 = remove_blanks_from_xmbuffer(xmbuffer2) if xmbuffer1 != xmbuffer2: return "\n=" + xmbuffer1 + "=\n" + '!=' + "\n=" + xmbuffer2 + "=\n" return '' def remove_tag_001_from_hmbuffer(hmbuffer): """Remove tag 001 from HTML MARC buffer. Useful for testing two HTML MARC buffers without paying attention to recIDs attributed during the bibupload. """ return re.sub(r'(^|\n)(
)?[0-9]{9}\s001__\s\d+($|\n)', '', hmbuffer)
 
 def compare_hmbuffers(hmbuffer1, hmbuffer2):
     """Compare two HM (HTML MARC) buffers by removing whitespaces
        before testing.
     """
 
     hmbuffer1 = hmbuffer1.strip()
     hmbuffer2 = hmbuffer2.strip()
 
     # remove eventual 
...
formatting: hmbuffer1 = re.sub(r'^
', '', hmbuffer1)
     hmbuffer2 = re.sub(r'^
', '', hmbuffer2)
     hmbuffer1 = re.sub(r'
$', '', hmbuffer1) hmbuffer2 = re.sub(r'
$', '', hmbuffer2) # remove 005 revision numbers: hmbuffer1 = re.sub(r'(^|\n)[0-9]{9}\s005.*($|\n)', '\n', hmbuffer1) hmbuffer2 = re.sub(r'(^|\n)[0-9]{9}\s005.*($|\n)', '\n', hmbuffer2) hmbuffer1 = hmbuffer1.strip() hmbuffer2 = hmbuffer2.strip() # remove leading recid, leaving only field values: hmbuffer1 = re.sub(r'(^|\n)[0-9]{9}\s', '', hmbuffer1) hmbuffer2 = re.sub(r'(^|\n)[0-9]{9}\s', '', hmbuffer2) # remove leading whitespace: hmbuffer1 = re.sub(r'(^|\n)\s+', '', hmbuffer1) hmbuffer2 = re.sub(r'(^|\n)\s+', '', hmbuffer2) compare_hmbuffers = hmbuffer1 == hmbuffer2 if not compare_hmbuffers: return "\n=" + hmbuffer1 + "=\n" + '!=' + "\n=" + hmbuffer2 + "=\n" return '' def wipe_out_record_from_all_tables(recid): """ Wipe out completely the record and all its traces of RECID from the database (bibrec, bibrec_bibxxx, bibxxx, bibfmt). Useful for the time being for test cases. """ # delete all the linked bibdocs for bibdoc in BibRecDocs(recid).list_bibdocs(): bibdoc.expunge() # delete from bibrec: run_sql("DELETE FROM bibrec WHERE id=%s", (recid,)) # delete from bibrec_bibxxx: for i in range(0, 10): for j in range(0, 10): run_sql("DELETE FROM %(bibrec_bibxxx)s WHERE id_bibrec=%%s" % \ {'bibrec_bibxxx': "bibrec_bib%i%ix" % (i, j)}, (recid,)) # delete all unused bibxxx values: for i in range(0, 10): for j in range(0, 10): run_sql("DELETE %(bibxxx)s FROM %(bibxxx)s " \ " LEFT JOIN %(bibrec_bibxxx)s " \ " ON %(bibxxx)s.id=%(bibrec_bibxxx)s.id_bibxxx " \ " WHERE %(bibrec_bibxxx)s.id_bibrec IS NULL" % \ {'bibxxx': "bib%i%ix" % (i, j), 'bibrec_bibxxx': "bibrec_bib%i%ix" % (i, j)}) # delete from bibfmt: run_sql("DELETE FROM bibfmt WHERE id_bibrec=%s", (recid,)) # delete from bibrec_bibdoc: run_sql("DELETE FROM bibrec_bibdoc WHERE id_bibrec=%s", (recid,)) def try_url_download(url): """Try to download a given URL""" try: open_url = urlopen(url) open_url.read() except Exception, e: raise StandardError("Downloading %s is impossible because of %s" % (url, str(e))) return True def force_webcoll(recid): from invenio import bibindex_engine reload(bibindex_engine) from invenio import websearch_webcoll reload(websearch_webcoll) index_id, index_name, index_tags = bibindex_engine.get_word_tables("collection")[0] bibindex_engine.WordTable(index_name, index_id, index_tags, "idxWORD%02dF", default_get_words_fnc=bibindex_engine.get_words_from_phrase, tag_to_words_fnc_map={'8564_u': bibindex_engine.get_words_from_fulltext}).add_recIDs([[recid, recid]], 1) c = websearch_webcoll.Collection() c.calculate_reclist() c.update_reclist() class GenericBibUploadTest(unittest.TestCase): """Generic BibUpload testing class with predefined setUp and tearDown methods. """ def setUp(self): self.verbose = 0 setup_loggers() task_set_task_param('verbose', self.verbose) self.last_recid = run_sql("SELECT MAX(id) FROM bibrec")[0][0] def tearDown(self): for recid in run_sql("SELECT id FROM bibrec WHERE id>%s", (self.last_recid,)): wipe_out_record_from_all_tables(recid[0]) class BibUploadCallbackURLTest(GenericBibUploadTest): """Testing usage of CLI callback_url""" def setUp(self): GenericBibUploadTest.setUp(self) self.test = """ something Tester, J Y MIT Tester, K J CERN2 Tester, G CERN3 test11 test31 test12 test32 test13 test33 test21 test41 test22 test42 test14 test51 test52 Tester, T CERN """ self.testfile_path = os.path.join(CFG_TMPDIR, 'bibupload_regression_test_input.xml') open(self.testfile_path, "w").write(self.test) self.resultfile_path = os.path.join(CFG_TMPDIR, 'bibupload_regression_test_result.json') if CFG_DEVEL_SITE: def test_simple_insert_callback_url(self): """bibupload - --callback-url with simple insert""" taskid = task_low_level_submission('bibupload', 'test', '-i', self.testfile_path, '--callback-url', CFG_SITE_URL + '/httptest/post2?%s' % urlencode({"save": self.resultfile_path}), '-v0') - os.system(CFG_BINDIR + '/bibupload' + " %s" % taskid) + run_shell_command(CFG_BINDIR + '/bibupload %s', [taskid]) results = json.loads(open(self.resultfile_path).read()) self.failUnless('results' in results) self.assertEqual(len(results['results']), 1) self.failUnless(results['results'][0]['success']) self.failUnless(results['results'][0]['recid'] > 0) self.failUnless("""Tester, J Y""" in results['results'][0]['marcxml'], results['results'][0]['marcxml']) class BibUploadInsertModeTest(GenericBibUploadTest): """Testing insert mode.""" def setUp(self): # pylint: disable=C0103 """Initialise the MARCXML variable""" GenericBibUploadTest.setUp(self) self.test = """ something Tester, J Y MIT Tester, K J CERN2 Tester, G CERN3 test11 test31 test12 test32 test13 test33 test21 test41 test22 test42 test14 test51 test52 Tester, T CERN """ self.test_hm = """ 100__ $$aTester, T$$uCERN 111__ $$atest11$$ctest31 111__ $$atest12$$ctest32 111__ $$atest13$$ctest33 111__ $$btest21$$dtest41 111__ $$btest22$$dtest42 111__ $$atest14 111__ $$etest51 111__ $$etest52 245__ $$asomething 700__ $$aTester, J Y$$uMIT 700__ $$aTester, K J$$uCERN2 700__ $$aTester, G$$uCERN3 """ def test_create_record_id(self): """bibupload - insert mode, trying to create a new record ID in the database""" rec_id = bibupload.create_new_record() self.assertNotEqual(None, rec_id) def test_create_specific_record_id(self): """bibupload - insert mode, trying to create a new specifc record ID in the database""" expected_rec_id = run_sql("SELECT MAX(id) FROM bibrec")[0][0] + 1 rec_id = bibupload.create_new_record(expected_rec_id) self.assertEqual(rec_id, expected_rec_id) def test_no_retrieve_record_id(self): """bibupload - insert mode, detection of record ID in the input file""" # We create create the record out of the xml marc recs = bibupload.xml_marc_to_records(self.test) # We call the function which should retrieve the record id rec_id = bibupload.retrieve_rec_id(recs[0], 'insert') # We compare the value found with None self.assertEqual(None, rec_id) def test_insert_complete_xmlmarc(self): """bibupload - insert mode, trying to insert complete MARCXML file""" # Initialize the global variable # We create create the record out of the xml marc recs = bibupload.xml_marc_to_records(self.test) # We call the main function with the record as a parameter err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # We retrieve the inserted xml inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') # Compare if the two MARCXML are the same self.assertEqual(compare_xmbuffers(remove_tag_001_from_xmbuffer(inserted_xm), self.test), '') self.assertEqual(compare_hmbuffers(remove_tag_001_from_hmbuffer(inserted_hm), self.test_hm), '') def test_retrieve_005_tag(self): """bibupload - insert mode, verifying insertion of 005 control field for record """ # Convert marc xml into record structure recs = bibupload.xml_marc_to_records(self.test) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # Retrive the inserted record based on the record id rec = get_record(recid) # We retrieve the creationdate date from the database query = """SELECT DATE_FORMAT(creation_date,'%%Y%%m%%d%%H%%i%%s') FROM bibrec where id = %s""" res = run_sql(query % recid) self.assertEqual(record_has_field(rec,'005'),True) self.assertEqual(str(res[0][0])+'.0',record_get_field_value(rec,'005','','')) class BibUploadAppendModeTest(GenericBibUploadTest): """Testing append mode.""" def setUp(self): # pylint: disable=C0103 """Initialize the MARCXML variable""" GenericBibUploadTest.setUp(self) self.test_existing = """ 123456789 Tester, T DESY 0003719PHOPHO """ self.test_to_append = """ 123456789 Tester, U CERN 0003719PHOPHO """ self.test_expected_xm = """ 123456789 Tester, T DESY Tester, U CERN 0003719PHOPHO """ self.test_expected_hm = """ 001__ 123456789 100__ $$aTester, T$$uDESY 100__ $$aTester, U$$uCERN 970__ $$a0003719PHOPHO """ # insert test record: test_to_upload = self.test_existing.replace('123456789', '') recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') self.test_recid = recid # replace test buffers with real recid of inserted test record: self.test_existing = self.test_existing.replace('123456789', str(self.test_recid)) self.test_to_append = self.test_to_append.replace('123456789', str(self.test_recid)) self.test_expected_xm = self.test_expected_xm.replace('123456789', str(self.test_recid)) self.test_expected_hm = self.test_expected_hm.replace('123456789', str(self.test_recid)) def test_retrieve_record_id(self): """bibupload - append mode, the input file should contain a record ID""" # We create create the record out of the xml marc recs = bibupload.xml_marc_to_records(self.test_to_append) # We call the function which should retrieve the record id rec_id = bibupload.retrieve_rec_id(recs[0], 'append') # We compare the value found with None self.assertEqual(self.test_recid, rec_id) # clean up after ourselves: def test_update_modification_record_date(self): """bibupload - append mode, checking the update of the modification date""" # Initialize the global variable # We create create the record out of the xml marc recs = bibupload.xml_marc_to_records(self.test_existing) # We call the function which should retrieve the record id rec_id = bibupload.retrieve_rec_id(recs[0], opt_mode='append') # Retrieve current localtime now = time.localtime() # We update the modification date bibupload.update_bibrec_modif_date(convert_datestruct_to_datetext(now), rec_id) # We retrieve the modification date from the database query = """SELECT DATE_FORMAT(modification_date,'%%Y-%%m-%%d %%H:%%i:%%s') FROM bibrec where id = %s""" res = run_sql(query % rec_id) # We compare the two results self.assertEqual(res[0][0], convert_datestruct_to_datetext(now)) # clean up after ourselves: def test_append_complete_xml_marc(self): """bibupload - append mode, appending complete MARCXML file""" # Now we append a datafield # We create create the record out of the xml marc recs = bibupload.xml_marc_to_records(self.test_to_append) # We call the main function with the record as a parameter err, recid, msg = bibupload.bibupload(recs[0], opt_mode='append') # We retrieve the inserted xm after_append_xm = print_record(recid, 'xm') after_append_hm = print_record(recid, 'hm') # Compare if the two MARCXML are the same self.assertEqual(compare_xmbuffers(after_append_xm, self.test_expected_xm), '') self.assertEqual(compare_hmbuffers(after_append_hm, self.test_expected_hm), '') # clean up after ourselves: def test_retrieve_updated_005_tag(self): """bibupload - append mode, updating 005 control tag after modifiction """ recs = bibupload.xml_marc_to_records(self.test_to_append) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='append') rec = get_record(recid) query = """SELECT DATE_FORMAT(modification_date,'%%Y%%m%%d%%H%%i%%s') FROM bibrec where id = %s""" res = run_sql(query % recid) self.assertEqual(str(res[0][0])+'.0',record_get_field_value(rec,'005','','')) class BibUploadCorrectModeTest(GenericBibUploadTest): """ Testing correcting a record containing similar tags (identical tag, different indicators). Currently Invenio replaces only those tags that have matching indicators too, unlike ALEPH500 that does not pay attention to indicators, it corrects all fields with the same tag, regardless of the indicator values. """ def setUp(self): """Initialize the MARCXML test record.""" GenericBibUploadTest.setUp(self) self.testrec1_xm = """ 123456789 SzGeCERN Test, Jane Test Institute Test, John Test University Cool Test, Jim Test Laboratory """ self.testrec1_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, Jane$$uTest Institute 10047 $$aTest, John$$uTest University 10048 $$aCool 10047 $$aTest, Jim$$uTest Laboratory """ self.testrec1_xm_to_correct = """ 123456789 Test, Joseph Test Academy Test2, Joseph Test2 Academy """ self.testrec1_corrected_xm = """ 123456789 SzGeCERN Test, Jane Test Institute Cool Test, Joseph Test Academy Test2, Joseph Test2 Academy """ self.testrec1_corrected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, Jane$$uTest Institute 10048 $$aCool 10047 $$aTest, Joseph$$uTest Academy 10047 $$aTest2, Joseph$$uTest2 Academy """ # insert test record: test_record_xm = self.testrec1_xm.replace('123456789', '') recs = bibupload.xml_marc_to_records(test_record_xm) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recID: self.testrec1_xm = self.testrec1_xm.replace('123456789', str(recid)) self.testrec1_hm = self.testrec1_hm.replace('123456789', str(recid)) self.testrec1_xm_to_correct = self.testrec1_xm_to_correct.replace('123456789', str(recid)) self.testrec1_corrected_xm = self.testrec1_corrected_xm.replace('123456789', str(recid)) self.testrec1_corrected_hm = self.testrec1_corrected_hm.replace('123456789', str(recid)) # test of the inserted record: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, self.testrec1_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.testrec1_hm), '') def test_record_correction(self): """bibupload - correct mode, similar MARCXML tags/indicators""" # correct some tags: recs = bibupload.xml_marc_to_records(self.testrec1_xm_to_correct) err, self.recid, msg = bibupload.bibupload(recs[0], opt_mode='correct') corrected_xm = print_record(self.recid, 'xm') corrected_hm = print_record(self.recid, 'hm') # did it work? self.assertEqual(compare_xmbuffers(corrected_xm, self.testrec1_corrected_xm), '') self.assertEqual(compare_hmbuffers(corrected_hm, self.testrec1_corrected_hm), '') # clean up after ourselves: return class BibUploadDeleteModeTest(GenericBibUploadTest): """ Testing deleting specific tags from a record while keeping anything else untouched. Currently Invenio deletes only those tags that have matching indicators too, unlike ALEPH500 that does not pay attention to indicators, it corrects all fields with the same tag, regardless of the indicator values. """ def setUp(self): """Initialize the MARCXML test record.""" GenericBibUploadTest.setUp(self) self.testrec1_xm = """ 123456789 SzGeCERN Test, Jane Test Institute Test, John Test University Cool Test, Jim Test Laboratory dumb text """ self.testrec1_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, Jane$$uTest Institute 10047 $$aTest, John$$uTest University 10048 $$aCool 10047 $$aTest, Jim$$uTest Laboratory 888__ $$adumb text """ self.testrec1_xm_to_delete = """ 123456789 Test, Jane Test Institute Test, Johnson Test University Cool dumb text """ self.testrec1_corrected_xm = """ 123456789 SzGeCERN Test, John Test University Test, Jim Test Laboratory """ self.testrec1_corrected_hm = """ 001__ 123456789 003__ SzGeCERN 10047 $$aTest, John$$uTest University 10047 $$aTest, Jim$$uTest Laboratory """ # insert test record: test_record_xm = self.testrec1_xm.replace('123456789', '') recs = bibupload.xml_marc_to_records(test_record_xm) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recID: self.testrec1_xm = self.testrec1_xm.replace('123456789', str(recid)) self.testrec1_hm = self.testrec1_hm.replace('123456789', str(recid)) self.testrec1_xm_to_delete = self.testrec1_xm_to_delete.replace('123456789', str(recid)) self.testrec1_corrected_xm = self.testrec1_corrected_xm.replace('123456789', str(recid)) self.testrec1_corrected_hm = self.testrec1_corrected_hm.replace('123456789', str(recid)) # test of the inserted record: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, self.testrec1_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.testrec1_hm), '') # Checking dumb text is in bibxxx self.failUnless(run_sql("SELECT id_bibrec from bibrec_bib88x WHERE id_bibrec=%s", (recid, ))) def test_record_tags_deletion(self): """bibupload - delete mode, deleting specific tags""" # correct some tags: recs = bibupload.xml_marc_to_records(self.testrec1_xm_to_delete) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='delete') corrected_xm = print_record(recid, 'xm') corrected_hm = print_record(recid, 'hm') # did it work? self.assertEqual(compare_xmbuffers(corrected_xm, self.testrec1_corrected_xm), '') self.assertEqual(compare_hmbuffers(corrected_hm, self.testrec1_corrected_hm), '') # Checking dumb text is no more in bibxxx self.failIf(run_sql("SELECT id_bibrec from bibrec_bib88x WHERE id_bibrec=%s", (recid, ))) # clean up after ourselves: class BibUploadReplaceModeTest(GenericBibUploadTest): """Testing replace mode.""" def test_record_replace(self): """bibupload - replace mode, similar MARCXML tags/indicators""" # replace some tags: testrec1_xm = """ 123456789 SzGeCERN Test, Jane Test Institute Test, John Test University Cool Test, Jim Test Laboratory """ testrec1_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, Jane$$uTest Institute 10047 $$aTest, John$$uTest University 10048 $$aCool 10047 $$aTest, Jim$$uTest Laboratory """ testrec1_xm_to_replace = """ 123456789 Test, Joseph Test Academy Test2, Joseph Test2 Academy """ testrec1_replaced_xm = """ 123456789 Test, Joseph Test Academy Test2, Joseph Test2 Academy """ testrec1_replaced_hm = """ 001__ 123456789 10047 $$aTest, Joseph$$uTest Academy 10047 $$aTest2, Joseph$$uTest2 Academy """ # insert test record: test_record_xm = testrec1_xm.replace('123456789', '') recs = bibupload.xml_marc_to_records(test_record_xm) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recID: testrec1_xm = testrec1_xm.replace('123456789', str(recid)) testrec1_hm = testrec1_hm.replace('123456789', str(recid)) testrec1_xm_to_replace = testrec1_xm_to_replace.replace('123456789', str(recid)) testrec1_replaced_xm = testrec1_replaced_xm.replace('123456789', str(recid)) testrec1_replaced_hm = testrec1_replaced_hm.replace('123456789', str(recid)) # test of the inserted record: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec1_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec1_hm), '') recs = bibupload.xml_marc_to_records(testrec1_xm_to_replace) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='replace') replaced_xm = print_record(recid, 'xm') replaced_hm = print_record(recid, 'hm') # did it work? self.assertEqual(compare_xmbuffers(replaced_xm, testrec1_replaced_xm), '') self.assertEqual(compare_hmbuffers(replaced_hm, testrec1_replaced_hm), '') def test_record_replace_force_non_existing(self): """bibupload - replace mode, force non existing recid""" # replace some tags: the_recid = self.last_recid + 1 testrec1_xm = """ %s SzGeCERN Test, Jane Test Institute Test, John Test University Cool Test, Jim Test Laboratory """ % the_recid testrec1_hm = """ 001__ %s 003__ SzGeCERN 100__ $$aTest, Jane$$uTest Institute 10047 $$aTest, John$$uTest University 10048 $$aCool 10047 $$aTest, Jim$$uTest Laboratory """ % the_recid recs = bibupload.xml_marc_to_records(testrec1_xm) task_set_option('force', True) try: err, recid, msg = bibupload.bibupload(recs[0], opt_mode='replace') finally: task_set_option('force', False) replaced_xm = print_record(recid, 'xm') replaced_hm = print_record(recid, 'hm') # did it work? self.assertEqual(compare_xmbuffers(replaced_xm, testrec1_xm), '') self.assertEqual(compare_hmbuffers(replaced_hm, testrec1_hm), '') self.assertEqual(recid, the_recid) def test_record_replace_non_existing(self): """bibupload - replace mode, non existing recid""" # replace some tags: the_recid = self.last_recid + 1 testrec1_xm = """ %s SzGeCERN Test, Jane Test Institute Test, John Test University Cool Test, Jim Test Laboratory """ % the_recid testrec1_hm = """ 001__ %s 003__ SzGeCERN 100__ $$aTest, Jane$$uTest Institute 10047 $$aTest, John$$uTest University 10048 $$aCool 10047 $$aTest, Jim$$uTest Laboratory """ % the_recid recs = bibupload.xml_marc_to_records(testrec1_xm) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='replace') self.assertEqual((err, recid), (1, -1)) def test_record_replace_two_recids(self): """bibupload - replace mode, two recids""" # replace some tags: testrec1_xm = """ 300 305 SzGeCERN Test, Jane Test Institute Test, John Test University Cool Test, Jim Test Laboratory """ recs = bibupload.xml_marc_to_records(testrec1_xm) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='replace') # did it work? self.assertEqual((err, recid), (1, -1)) class BibUploadReferencesModeTest(GenericBibUploadTest): """Testing references mode.""" def setUp(self): """Initialize the MARCXML variable""" GenericBibUploadTest.setUp(self) self.test_insert = """ 123456789 Tester, T CERN """ self.test_reference = """ 123456789 M. Lüscher and P. Weisz, String excitation energies in SU(N) gauge theories beyond the free-string approximation, J. High Energy Phys. 07 (2004) 014 """ self.test_reference_expected_xm = """ 123456789 Tester, T CERN M. Lüscher and P. Weisz, String excitation energies in SU(N) gauge theories beyond the free-string approximation, J. High Energy Phys. 07 (2004) 014 """ self.test_insert_hm = """ 001__ 123456789 100__ $$aTester, T$$uCERN """ self.test_reference_expected_hm = """ 001__ 123456789 100__ $$aTester, T$$uCERN %(reference_tag)sC5 $$mM. Lüscher and P. Weisz, String excitation energies in SU(N) gauge theories beyond the free-string approximation,$$sJ. High Energy Phys. 07 (2004) 014 """ % {'reference_tag': bibupload.CFG_BIBUPLOAD_REFERENCE_TAG} # insert test record: test_insert = self.test_insert.replace('123456789', '') recs = bibupload.xml_marc_to_records(test_insert) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recID: self.test_insert = self.test_insert.replace('123456789', str(recid)) self.test_insert_hm = self.test_insert_hm.replace('123456789', str(recid)) self.test_reference = self.test_reference.replace('123456789', str(recid)) self.test_reference_expected_xm = self.test_reference_expected_xm.replace('123456789', str(recid)) self.test_reference_expected_hm = self.test_reference_expected_hm.replace('123456789', str(recid)) # test of the inserted record: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, self.test_insert), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.test_insert_hm), '') self.test_recid = recid def test_reference_complete_xml_marc(self): """bibupload - reference mode, inserting references MARCXML file""" # We create create the record out of the xml marc recs = bibupload.xml_marc_to_records(self.test_reference) # We call the main function with the record as a parameter err, recid, msg = bibupload.bibupload(recs[0], opt_mode='reference') # We retrieve the inserted xml reference_xm = print_record(recid, 'xm') reference_hm = print_record(recid, 'hm') # Compare if the two MARCXML are the same self.assertEqual(compare_xmbuffers(reference_xm, self.test_reference_expected_xm), '') self.assertEqual(compare_hmbuffers(reference_hm, self.test_reference_expected_hm), '') class BibUploadFMTModeTest(GenericBibUploadTest): """Testing FMT mode.""" def setUp(self): """Initialize the MARCXML variable""" GenericBibUploadTest.setUp(self) self.new_xm_with_fmt = """ SzGeCERN HB Test. Okay. 2008-03-14 15:14:00 Bar, Baz Foo On the quux and huux """ self.expected_xm_after_inserting_new_xm_with_fmt = """ 123456789 SzGeCERN Bar, Baz Foo On the quux and huux """ self.expected_hm_after_inserting_new_xm_with_fmt = """ 001__ 123456789 003__ SzGeCERN 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux """ self.recid76_xm_before_all_the_tests = print_record(76, 'xm') self.recid76_hm_before_all_the_tests = print_record(76, 'hm') self.recid76_fmts = run_sql("""SELECT last_updated, value, format FROM bibfmt WHERE id_bibrec=76""") self.recid76_xm_with_fmt = """ 76 SzGeCERN HB Test. Here is some format value. Doe, John CERN On the foos and bars """ self.recid76_xm_with_fmt_only_first = """ 76 HB Test. Let us see if this gets inserted well. """ self.recid76_xm_with_fmt_only_second = """ 76 HB Test. Yet another test, to be run after the first one. HD Test. Let's see what will be stored in the detailed format field. """ def tearDown(self): """Helper function that restores recID 76 MARCXML, using the value saved before all the tests started to execute. (see self.recid76_xm_before_all_the_tests). Does not restore HB and HD formats. """ recs = bibupload.xml_marc_to_records(self.recid76_xm_before_all_the_tests) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='replace') for (last_updated, value, format) in self.recid76_fmts: run_sql("""UPDATE bibfmt SET last_updated=%s, value=%s WHERE id_bibrec=76 AND format=%s""", (last_updated, value, format)) inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, self.recid76_xm_before_all_the_tests), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.recid76_hm_before_all_the_tests), '') def test_inserting_new_record_containing_fmt_tag(self): """bibupload - FMT tag, inserting new record containing FMT tag""" recs = bibupload.xml_marc_to_records(self.new_xm_with_fmt) dummy, new_recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') xm_after = print_record(new_recid, 'xm') hm_after = print_record(new_recid, 'hm') hb_after = print_record(new_recid, 'hb') self.assertEqual(compare_xmbuffers(xm_after, self.expected_xm_after_inserting_new_xm_with_fmt.replace('123456789', str(new_recid))), '') self.assertEqual(compare_hmbuffers(hm_after, self.expected_hm_after_inserting_new_xm_with_fmt.replace('123456789', str(new_recid))), '') self.assertEqual(run_sql('SELECT last_updated from bibfmt WHERE id_bibrec=%s', (new_recid, ))[0][0], datetime.datetime(2008, 3, 14, 15, 14)) self.failUnless(hb_after.startswith("Test. Okay.")) def test_updating_existing_record_formats_in_format_mode(self): """bibupload - FMT tag, updating existing record via format mode""" xm_before = print_record(76, 'xm') hm_before = print_record(76, 'hm') # insert first format value: recs = bibupload.xml_marc_to_records(self.recid76_xm_with_fmt_only_first) bibupload.bibupload(recs[0], opt_mode='format') xm_after = print_record(76, 'xm') hm_after = print_record(76, 'hm') hb_after = print_record(76, 'hb') self.assertEqual(xm_after, xm_before) self.assertEqual(hm_after, hm_before) self.failUnless(hb_after.startswith("Test. Let us see if this gets inserted well.")) # now insert another format value and recheck: recs = bibupload.xml_marc_to_records(self.recid76_xm_with_fmt_only_second) bibupload.bibupload(recs[0], opt_mode='format') xm_after = print_record(76, 'xm') hm_after = print_record(76, 'hm') hb_after = print_record(76, 'hb') hd_after = print_record(76, 'hd') self.assertEqual(xm_after, xm_before) self.assertEqual(hm_after, hm_before) self.failUnless(hb_after.startswith("Test. Yet another test, to be run after the first one.")) self.failUnless(hd_after.startswith("Test. Let's see what will be stored in the detailed format field.")) def test_updating_existing_record_formats_in_correct_mode(self): """bibupload - FMT tag, updating existing record via correct mode""" xm_before = print_record(76, 'xm') hm_before = print_record(76, 'hm') # insert first format value: recs = bibupload.xml_marc_to_records(self.recid76_xm_with_fmt_only_first) bibupload.bibupload(recs[0], opt_mode='correct') xm_after = print_record(76, 'xm') hm_after = print_record(76, 'hm') hb_after = print_record(76, 'hb') self.assertEqual(compare_xmbuffers(xm_after, xm_before), '') self.assertEqual(compare_hmbuffers(hm_after, hm_before), '') self.failUnless(hb_after.startswith("Test. Let us see if this gets inserted well.")) # now insert another format value and recheck: recs = bibupload.xml_marc_to_records(self.recid76_xm_with_fmt_only_second) bibupload.bibupload(recs[0], opt_mode='correct') xm_after = print_record(76, 'xm') hm_after = print_record(76, 'hm') hb_after = print_record(76, 'hb') hd_after = print_record(76, 'hd') self.assertEqual(compare_xmbuffers(xm_after, xm_before), '') self.assertEqual(compare_hmbuffers(hm_after, hm_before), '') self.failUnless(hb_after.startswith("Test. Yet another test, to be run after the first one.")) self.failUnless(hd_after.startswith("Test. Let's see what will be stored in the detailed format field.")) def test_updating_existing_record_formats_in_replace_mode(self): """bibupload - FMT tag, updating existing record via replace mode""" # insert first format value: recs = bibupload.xml_marc_to_records(self.recid76_xm_with_fmt_only_first) bibupload.bibupload(recs[0], opt_mode='replace') xm_after = print_record(76, 'xm') hm_after = print_record(76, 'hm') hb_after = print_record(76, 'hb') self.assertEqual(compare_xmbuffers(xm_after, '76'), '') self.assertEqual(compare_hmbuffers(hm_after, '000000076 001__ 76'), '') self.failUnless(hb_after.startswith("Test. Let us see if this gets inserted well.")) # now insert another format value and recheck: recs = bibupload.xml_marc_to_records(self.recid76_xm_with_fmt_only_second) bibupload.bibupload(recs[0], opt_mode='replace') xm_after = print_record(76, 'xm') hm_after = print_record(76, 'hm') hb_after = print_record(76, 'hb') hd_after = print_record(76, 'hd') self.assertEqual(compare_xmbuffers(xm_after, """ 76 """), '') self.assertEqual(compare_hmbuffers(hm_after, '000000076 001__ 76'), '') self.failUnless(hb_after.startswith("Test. Yet another test, to be run after the first one.")) self.failUnless(hd_after.startswith("Test. Let's see what will be stored in the detailed format field.")) # final insertion and recheck: recs = bibupload.xml_marc_to_records(self.recid76_xm_with_fmt) bibupload.bibupload(recs[0], opt_mode='replace') xm_after = print_record(76, 'xm') hm_after = print_record(76, 'hm') hb_after = print_record(76, 'hb') hd_after = print_record(76, 'hd') self.assertEqual(compare_xmbuffers(xm_after, """ 76 SzGeCERN Doe, John CERN On the foos and bars """), '') self.assertEqual(compare_hmbuffers(hm_after, """ 001__ 76 003__ SzGeCERN 100__ $$aDoe, John$$uCERN 245__ $$aOn the foos and bars """), '') self.failUnless(hb_after.startswith("Test. Here is some format value.")) self.failUnless(hd_after.startswith("Test. Let's see what will be stored in the detailed format field.")) class BibUploadRecordsWithSYSNOTest(GenericBibUploadTest): """Testing uploading of records that have external SYSNO present.""" def setUp(self): # pylint: disable=C0103 """Initialize the MARCXML test records.""" GenericBibUploadTest.setUp(self) # Note that SYSNO fields are repeated but with different # subfields, this is to test whether bibupload would not # mistakenly pick up wrong values. self.xm_testrec1 = """ 123456789 SzGeCERN Bar, Baz Foo On the quux and huux 1 sysno1 sysno2 """ % {'sysnotag': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3], 'sysnoind1': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or " ", 'sysnoind2': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or " ", 'sysnosubfieldcode': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6], } self.hm_testrec1 = """ 001__ 123456789 003__ SzGeCERN 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux 1 %(sysnotag)s%(sysnoind1)s%(sysnoind2)s $$%(sysnosubfieldcode)ssysno1 %(sysnotag)s%(sysnoind1)s%(sysnoind2)s $$0sysno2 """ % {'sysnotag': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3], 'sysnoind1': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4], 'sysnoind2': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5], 'sysnosubfieldcode': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6], } self.xm_testrec1_to_update = """ SzGeCERN Bar, Baz Foo On the quux and huux 1 Updated sysno1 sysno2 """ % {'sysnotag': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3], 'sysnoind1': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or " ", 'sysnoind2': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or " ", 'sysnosubfieldcode': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6], } self.xm_testrec1_updated = """ 123456789 SzGeCERN Bar, Baz Foo On the quux and huux 1 Updated sysno1 sysno2 """ % {'sysnotag': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3], 'sysnoind1': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or " ", 'sysnoind2': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or " ", 'sysnosubfieldcode': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6], } self.hm_testrec1_updated = """ 001__ 123456789 003__ SzGeCERN 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux 1 Updated %(sysnotag)s%(sysnoind1)s%(sysnoind2)s $$%(sysnosubfieldcode)ssysno1 %(sysnotag)s%(sysnoind1)s%(sysnoind2)s $$0sysno2 """ % {'sysnotag': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3], 'sysnoind1': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4], 'sysnoind2': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5], 'sysnosubfieldcode': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6], } self.xm_testrec2 = """ 987654321 SzGeCERN Bar, Baz Foo On the quux and huux 2 sysno2 sysno1 """ % {'sysnotag': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3], 'sysnoind1': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4] or " ", 'sysnoind2': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5] or " ", 'sysnosubfieldcode': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6], } self.hm_testrec2 = """ 001__ 987654321 003__ SzGeCERN 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux 2 %(sysnotag)s%(sysnoind1)s%(sysnoind2)s $$%(sysnosubfieldcode)ssysno2 %(sysnotag)s%(sysnoind1)s%(sysnoind2)s $$0sysno1 """ % {'sysnotag': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[0:3], 'sysnoind1': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[3:4], 'sysnoind2': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[4:5], 'sysnosubfieldcode': CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG[5:6], } def test_insert_the_same_sysno_record(self): """bibupload - SYSNO tag, refuse to insert the same SYSNO record""" # initialize bibupload mode: if self.verbose: print "test_insert_the_same_sysno_record() started" # insert record 1 first time: testrec_to_insert_first = self.xm_testrec1.replace('123456789', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='insert') inserted_xm = print_record(recid1, 'xm') inserted_hm = print_record(recid1, 'hm') # use real recID when comparing whether it worked: self.xm_testrec1 = self.xm_testrec1.replace('123456789', str(recid1)) self.hm_testrec1 = self.hm_testrec1.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1), '') # insert record 2 first time: testrec_to_insert_first = self.xm_testrec2.replace('987654321', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err2, recid2, msg2 = bibupload.bibupload(recs[0], opt_mode='insert') inserted_xm = print_record(recid2, 'xm') inserted_hm = print_record(recid2, 'hm') # use real recID when comparing whether it worked: self.xm_testrec2 = self.xm_testrec2.replace('987654321', str(recid2)) self.hm_testrec2 = self.hm_testrec2.replace('987654321', str(recid2)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec2), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec2), '') # try to insert updated record 1, it should fail: recs = bibupload.xml_marc_to_records(self.xm_testrec1_to_update) err1_updated, recid1_updated, msg1_updated = bibupload.bibupload(recs[0], opt_mode='insert') self.assertEqual(-1, recid1_updated) if self.verbose: print "test_insert_the_same_sysno_record() finished" def test_insert_or_replace_the_same_sysno_record(self): """bibupload - SYSNO tag, allow to insert or replace the same SYSNO record""" # initialize bibupload mode: if self.verbose: print "test_insert_or_replace_the_same_sysno_record() started" # insert/replace record 1 first time: testrec_to_insert_first = self.xm_testrec1.replace('123456789', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='replace_or_insert') inserted_xm = print_record(recid1, 'xm') inserted_hm = print_record(recid1, 'hm') # use real recID in test buffers when comparing whether it worked: self.xm_testrec1 = self.xm_testrec1.replace('123456789', str(recid1)) self.hm_testrec1 = self.hm_testrec1.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1), '') # try to insert/replace updated record 1, it should be okay: recs = bibupload.xml_marc_to_records(self.xm_testrec1_to_update) err1_updated, recid1_updated, msg1_updated = bibupload.bibupload(recs[0], opt_mode='replace_or_insert') inserted_xm = print_record(recid1_updated, 'xm') inserted_hm = print_record(recid1_updated, 'hm') self.assertEqual(recid1, recid1_updated) # use real recID in test buffers when comparing whether it worked: self.xm_testrec1_updated = self.xm_testrec1_updated.replace('123456789', str(recid1)) self.hm_testrec1_updated = self.hm_testrec1_updated.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1_updated), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1_updated), '') if self.verbose: print "test_insert_or_replace_the_same_sysno_record() finished" def test_replace_nonexisting_sysno_record(self): """bibupload - SYSNO tag, refuse to replace non-existing SYSNO record""" # initialize bibupload mode: if self.verbose: print "test_replace_nonexisting_sysno_record() started" # insert record 1 first time: testrec_to_insert_first = self.xm_testrec1.replace('123456789', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='replace_or_insert') inserted_xm = print_record(recid1, 'xm') inserted_hm = print_record(recid1, 'hm') # use real recID in test buffers when comparing whether it worked: self.xm_testrec1 = self.xm_testrec1.replace('123456789', str(recid1)) self.hm_testrec1 = self.hm_testrec1.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1), '') # try to replace record 2 it should fail: testrec_to_insert_first = self.xm_testrec2.replace('987654321', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err2, recid2, msg2 = bibupload.bibupload(recs[0], opt_mode='replace') self.assertEqual(-1, recid2) if self.verbose: print "test_replace_nonexisting_sysno_record() finished" class BibUploadRecordsWithEXTOAIIDTest(GenericBibUploadTest): """Testing uploading of records that have external EXTOAIID present.""" def setUp(self): # pylint: disable=C0103 """Initialize the MARCXML test records.""" GenericBibUploadTest.setUp(self) # Note that EXTOAIID fields are repeated but with different # subfields, this is to test whether bibupload would not # mistakenly pick up wrong values. self.xm_testrec1 = """ 123456789 SzGeCERN extoaiid1 extoaisrc1 extoaiid2 Bar, Baz Foo On the quux and huux 1 """ % {'extoaiidtag': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], 'extoaiidind1': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or " ", 'extoaiidind2': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or " ", 'extoaiidsubfieldcode': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6], 'extoaisrcsubfieldcode' : CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6], } self.hm_testrec1 = """ 001__ 123456789 003__ SzGeCERN %(extoaiidtag)s%(extoaiidind1)s%(extoaiidind2)s $$%(extoaisrcsubfieldcode)sextoaisrc1$$%(extoaiidsubfieldcode)sextoaiid1 %(extoaiidtag)s%(extoaiidind1)s%(extoaiidind2)s $$0extoaiid2 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux 1 """ % {'extoaiidtag': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], 'extoaiidind1': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4], 'extoaiidind2': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5], 'extoaiidsubfieldcode': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6], 'extoaisrcsubfieldcode' : CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6], } self.xm_testrec1_to_update = """ SzGeCERN extoaiid1 extoaisrc1 extoaiid2 Bar, Baz Foo On the quux and huux 1 Updated """ % {'extoaiidtag': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], 'extoaiidind1': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or " ", 'extoaiidind2': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or " ", 'extoaiidsubfieldcode': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6], 'extoaisrcsubfieldcode' : CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6], } self.xm_testrec1_updated = """ 123456789 SzGeCERN extoaiid1 extoaisrc1 extoaiid2 Bar, Baz Foo On the quux and huux 1 Updated """ % {'extoaiidtag': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], 'extoaiidind1': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or " ", 'extoaiidind2': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or " ", 'extoaiidsubfieldcode': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6], 'extoaisrcsubfieldcode' : CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6], } self.hm_testrec1_updated = """ 001__ 123456789 003__ SzGeCERN %(extoaiidtag)s%(extoaiidind1)s%(extoaiidind2)s $$%(extoaisrcsubfieldcode)sextoaisrc1$$%(extoaiidsubfieldcode)sextoaiid1 %(extoaiidtag)s%(extoaiidind1)s%(extoaiidind2)s $$0extoaiid2 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux 1 Updated """ % {'extoaiidtag': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], 'extoaiidind1': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4], 'extoaiidind2': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5], 'extoaiidsubfieldcode': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6], 'extoaisrcsubfieldcode' : CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6], } self.xm_testrec2 = """ 987654321 SzGeCERN extoaiid2 extoaisrc1 extoaiid1 Bar, Baz Foo On the quux and huux 2 """ % {'extoaiidtag': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], 'extoaiidind1': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or " ", 'extoaiidind2': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or " ", 'extoaiidsubfieldcode': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6], 'extoaisrcsubfieldcode' : CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6], } self.hm_testrec2 = """ 001__ 987654321 003__ SzGeCERN %(extoaiidtag)s%(extoaiidind1)s%(extoaiidind2)s $$%(extoaisrcsubfieldcode)sextoaisrc1$$%(extoaiidsubfieldcode)sextoaiid2 %(extoaiidtag)s%(extoaiidind1)s%(extoaiidind2)s $$0extoaiid1 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux 2 """ % {'extoaiidtag': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], 'extoaiidind1': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4], 'extoaiidind2': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5], 'extoaiidsubfieldcode': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6], 'extoaisrcsubfieldcode' : CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG[5:6], } def test_insert_the_same_extoaiid_record(self): """bibupload - EXTOAIID tag, refuse to insert the same EXTOAIID record""" # initialize bibupload mode: if self.verbose: print "test_insert_the_same_extoaiid_record() started" # insert record 1 first time: testrec_to_insert_first = self.xm_testrec1.replace('123456789', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='insert') inserted_xm = print_record(recid1, 'xm') inserted_hm = print_record(recid1, 'hm') # use real recID when comparing whether it worked: self.xm_testrec1 = self.xm_testrec1.replace('123456789', str(recid1)) self.hm_testrec1 = self.hm_testrec1.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1), '') # insert record 2 first time: testrec_to_insert_first = self.xm_testrec2.replace('987654321', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err2, recid2, msg2 = bibupload.bibupload(recs[0], opt_mode='insert') inserted_xm = print_record(recid2, 'xm') inserted_hm = print_record(recid2, 'hm') # use real recID when comparing whether it worked: self.xm_testrec2 = self.xm_testrec2.replace('987654321', str(recid2)) self.hm_testrec2 = self.hm_testrec2.replace('987654321', str(recid2)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec2), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec2), '') # try to insert updated record 1, it should fail: recs = bibupload.xml_marc_to_records(self.xm_testrec1_to_update) err1_updated, recid1_updated, msg1_updated = bibupload.bibupload(recs[0], opt_mode='insert') self.assertEqual(-1, recid1_updated) if self.verbose: print "test_insert_the_same_extoaiid_record() finished" def test_insert_or_replace_the_same_extoaiid_record(self): """bibupload - EXTOAIID tag, allow to insert or replace the same EXTOAIID record""" # initialize bibupload mode: if self.verbose: print "test_insert_or_replace_the_same_extoaiid_record() started" # insert/replace record 1 first time: testrec_to_insert_first = self.xm_testrec1.replace('123456789', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='replace_or_insert') inserted_xm = print_record(recid1, 'xm') inserted_hm = print_record(recid1, 'hm') # use real recID in test buffers when comparing whether it worked: self.xm_testrec1 = self.xm_testrec1.replace('123456789', str(recid1)) self.hm_testrec1 = self.hm_testrec1.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1), '') # try to insert/replace updated record 1, it should be okay: recs = bibupload.xml_marc_to_records(self.xm_testrec1_to_update) err1_updated, recid1_updated, msg1_updated = bibupload.bibupload(recs[0], opt_mode='replace_or_insert') inserted_xm = print_record(recid1_updated, 'xm') inserted_hm = print_record(recid1_updated, 'hm') self.assertEqual(recid1, recid1_updated) # use real recID in test buffers when comparing whether it worked: self.xm_testrec1_updated = self.xm_testrec1_updated.replace('123456789', str(recid1)) self.hm_testrec1_updated = self.hm_testrec1_updated.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1_updated), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1_updated), '') if self.verbose: print "test_insert_or_replace_the_same_extoaiid_record() finished" def test_replace_nonexisting_extoaiid_record(self): """bibupload - EXTOAIID tag, refuse to replace non-existing EXTOAIID record""" # initialize bibupload mode: if self.verbose: print "test_replace_nonexisting_extoaiid_record() started" # insert record 1 first time: testrec_to_insert_first = self.xm_testrec1.replace('123456789', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='replace_or_insert') inserted_xm = print_record(recid1, 'xm') inserted_hm = print_record(recid1, 'hm') # use real recID in test buffers when comparing whether it worked: self.xm_testrec1 = self.xm_testrec1.replace('123456789', str(recid1)) self.hm_testrec1 = self.hm_testrec1.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1), '') # try to replace record 2 it should fail: testrec_to_insert_first = self.xm_testrec2.replace('987654321', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err2, recid2, msg2 = bibupload.bibupload(recs[0], opt_mode='replace') self.assertEqual(-1, recid2) if self.verbose: print "test_replace_nonexisting_extoaiid_record() finished" class BibUploadRecordsWithOAIIDTest(GenericBibUploadTest): """Testing uploading of records that have OAI ID present.""" def setUp(self): """Initialize the MARCXML test records.""" GenericBibUploadTest.setUp(self) # Note that OAI fields are repeated but with different # subfields, this is to test whether bibupload would not # mistakenly pick up wrong values. GenericBibUploadTest.setUp(self) self.xm_testrec1 = """ 123456789 SzGeCERN Bar, Baz Foo On the quux and huux 1 oai:foo:1 oai:foo:2 """ % {'oaitag': CFG_OAI_ID_FIELD[0:3], 'oaiind1': CFG_OAI_ID_FIELD[3:4] != "_" and \ CFG_OAI_ID_FIELD[3:4] or " ", 'oaiind2': CFG_OAI_ID_FIELD[4:5] != "_" and \ CFG_OAI_ID_FIELD[4:5] or " ", 'oaisubfieldcode': CFG_OAI_ID_FIELD[5:6], } self.hm_testrec1 = """ 001__ 123456789 003__ SzGeCERN 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux 1 %(oaitag)s%(oaiind1)s%(oaiind2)s $$%(oaisubfieldcode)soai:foo:1 %(oaitag)s%(oaiind1)s%(oaiind2)s $$0oai:foo:2 """ % {'oaitag': CFG_OAI_ID_FIELD[0:3], 'oaiind1': CFG_OAI_ID_FIELD[3:4], 'oaiind2': CFG_OAI_ID_FIELD[4:5], 'oaisubfieldcode': CFG_OAI_ID_FIELD[5:6], } self.xm_testrec1_to_update = """ SzGeCERN Bar, Baz Foo On the quux and huux 1 Updated oai:foo:1 oai:foo:2 """ % {'oaitag': CFG_OAI_ID_FIELD[0:3], 'oaiind1': CFG_OAI_ID_FIELD[3:4] != "_" and \ CFG_OAI_ID_FIELD[3:4] or " ", 'oaiind2': CFG_OAI_ID_FIELD[4:5] != "_" and \ CFG_OAI_ID_FIELD[4:5] or " ", 'oaisubfieldcode': CFG_OAI_ID_FIELD[5:6], } self.xm_testrec1_updated = """ 123456789 SzGeCERN Bar, Baz Foo On the quux and huux 1 Updated oai:foo:1 oai:foo:2 """ % {'oaitag': CFG_OAI_ID_FIELD[0:3], 'oaiind1': CFG_OAI_ID_FIELD[3:4] != "_" and \ CFG_OAI_ID_FIELD[3:4] or " ", 'oaiind2': CFG_OAI_ID_FIELD[4:5] != "_" and \ CFG_OAI_ID_FIELD[4:5] or " ", 'oaisubfieldcode': CFG_OAI_ID_FIELD[5:6], } self.hm_testrec1_updated = """ 001__ 123456789 003__ SzGeCERN 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux 1 Updated %(oaitag)s%(oaiind1)s%(oaiind2)s $$%(oaisubfieldcode)soai:foo:1 %(oaitag)s%(oaiind1)s%(oaiind2)s $$0oai:foo:2 """ % {'oaitag': CFG_OAI_ID_FIELD[0:3], 'oaiind1': CFG_OAI_ID_FIELD[3:4], 'oaiind2': CFG_OAI_ID_FIELD[4:5], 'oaisubfieldcode': CFG_OAI_ID_FIELD[5:6], } self.xm_testrec2 = """ 987654321 SzGeCERN Bar, Baz Foo On the quux and huux 2 oai:foo:2 oai:foo:1 """ % {'oaitag': CFG_OAI_ID_FIELD[0:3], 'oaiind1': CFG_OAI_ID_FIELD[3:4] != "_" and \ CFG_OAI_ID_FIELD[3:4] or " ", 'oaiind2': CFG_OAI_ID_FIELD[4:5] != "_" and \ CFG_OAI_ID_FIELD[4:5] or " ", 'oaisubfieldcode': CFG_OAI_ID_FIELD[5:6], } self.hm_testrec2 = """ 001__ 987654321 003__ SzGeCERN 100__ $$aBar, Baz$$uFoo 245__ $$aOn the quux and huux 2 %(oaitag)s%(oaiind1)s%(oaiind2)s $$%(oaisubfieldcode)soai:foo:2 %(oaitag)s%(oaiind1)s%(oaiind2)s $$0oai:foo:1 """ % {'oaitag': CFG_OAI_ID_FIELD[0:3], 'oaiind1': CFG_OAI_ID_FIELD[3:4], 'oaiind2': CFG_OAI_ID_FIELD[4:5], 'oaisubfieldcode': CFG_OAI_ID_FIELD[5:6], } def test_insert_the_same_oai_record(self): """bibupload - OAIID tag, refuse to insert the same OAI record""" # insert record 1 first time: testrec_to_insert_first = self.xm_testrec1.replace('123456789', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='insert') inserted_xm = print_record(recid1, 'xm') inserted_hm = print_record(recid1, 'hm') # use real recID when comparing whether it worked: self.xm_testrec1 = self.xm_testrec1.replace('123456789', str(recid1)) self.hm_testrec1 = self.hm_testrec1.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1), '') # insert record 2 first time: testrec_to_insert_first = self.xm_testrec2.replace('987654321', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err2, recid2, msg2 = bibupload.bibupload(recs[0], opt_mode='insert') inserted_xm = print_record(recid2, 'xm') inserted_hm = print_record(recid2, 'hm') # use real recID when comparing whether it worked: self.xm_testrec2 = self.xm_testrec2.replace('987654321', str(recid2)) self.hm_testrec2 = self.hm_testrec2.replace('987654321', str(recid2)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec2), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec2), '') # try to insert updated record 1, it should fail: recs = bibupload.xml_marc_to_records(self.xm_testrec1_to_update) err1_updated, recid1_updated, msg1_updated = bibupload.bibupload(recs[0], opt_mode='insert') self.assertEqual(-1, recid1_updated) def test_insert_or_replace_the_same_oai_record(self): """bibupload - OAIID tag, allow to insert or replace the same OAI record""" # initialize bibupload mode: # insert/replace record 1 first time: testrec_to_insert_first = self.xm_testrec1.replace('123456789', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='replace_or_insert') inserted_xm = print_record(recid1, 'xm') inserted_hm = print_record(recid1, 'hm') # use real recID in test buffers when comparing whether it worked: self.xm_testrec1 = self.xm_testrec1.replace('123456789', str(recid1)) self.hm_testrec1 = self.hm_testrec1.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1), '') # try to insert/replace updated record 1, it should be okay: recs = bibupload.xml_marc_to_records(self.xm_testrec1_to_update) err1_updated, recid1_updated, msg1_updated = bibupload.bibupload(recs[0], opt_mode='replace_or_insert') inserted_xm = print_record(recid1_updated, 'xm') inserted_hm = print_record(recid1_updated, 'hm') self.assertEqual(recid1, recid1_updated) # use real recID in test buffers when comparing whether it worked: self.xm_testrec1_updated = self.xm_testrec1_updated.replace('123456789', str(recid1)) self.hm_testrec1_updated = self.hm_testrec1_updated.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1_updated), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1_updated), '') def test_replace_nonexisting_oai_record(self): """bibupload - OAIID tag, refuse to replace non-existing OAI record""" # insert record 1 first time: testrec_to_insert_first = self.xm_testrec1.replace('123456789', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='replace_or_insert') inserted_xm = print_record(recid1, 'xm') inserted_hm = print_record(recid1, 'hm') # use real recID in test buffers when comparing whether it worked: self.xm_testrec1 = self.xm_testrec1.replace('123456789', str(recid1)) self.hm_testrec1 = self.hm_testrec1.replace('123456789', str(recid1)) self.assertEqual(compare_xmbuffers(inserted_xm, self.xm_testrec1), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.hm_testrec1), '') # try to replace record 2 it should fail: testrec_to_insert_first = self.xm_testrec2.replace('987654321', '') recs = bibupload.xml_marc_to_records(testrec_to_insert_first) err2, recid2, msg2 = bibupload.bibupload(recs[0], opt_mode='replace') self.assertEqual(-1, recid2) class BibUploadIndicatorsTest(GenericBibUploadTest): """ Testing uploading of a MARCXML record with indicators having either blank space (as per MARC schema) or empty string value (old behaviour). """ def setUp(self): """Initialize the MARCXML test record.""" GenericBibUploadTest.setUp(self) self.testrec1_xm = """ SzGeCERN Test, John Test University """ self.testrec1_hm = """ 003__ SzGeCERN 100__ $$aTest, John$$uTest University """ self.testrec2_xm = """ SzGeCERN Test, John Test University """ self.testrec2_hm = """ 003__ SzGeCERN 100__ $$aTest, John$$uTest University """ def test_record_with_spaces_in_indicators(self): """bibupload - inserting MARCXML with spaces in indicators""" recs = bibupload.xml_marc_to_records(self.testrec1_xm) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(remove_tag_001_from_xmbuffer(inserted_xm), self.testrec1_xm), '') self.assertEqual(compare_hmbuffers(remove_tag_001_from_hmbuffer(inserted_hm), self.testrec1_hm), '') def test_record_with_no_spaces_in_indicators(self): """bibupload - inserting MARCXML with no spaces in indicators""" recs = bibupload.xml_marc_to_records(self.testrec2_xm) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(remove_tag_001_from_xmbuffer(inserted_xm), self.testrec2_xm), '') self.assertEqual(compare_hmbuffers(remove_tag_001_from_hmbuffer(inserted_hm), self.testrec2_hm), '') class BibUploadUpperLowerCaseTest(GenericBibUploadTest): """ Testing treatment of similar records with only upper and lower case value differences in the bibxxx table. """ def setUp(self): """Initialize the MARCXML test records.""" GenericBibUploadTest.setUp(self) self.testrec1_xm = """ SzGeCERN Test, John Test University """ self.testrec1_hm = """ 003__ SzGeCERN 100__ $$aTest, John$$uTest University """ self.testrec2_xm = """ SzGeCERN TeSt, JoHn Test UniVeRsity """ self.testrec2_hm = """ 003__ SzGeCERN 100__ $$aTeSt, JoHn$$uTest UniVeRsity """ def test_record_with_upper_lower_case_letters(self): """bibupload - inserting similar MARCXML records with upper/lower case""" # insert test record #1: recs = bibupload.xml_marc_to_records(self.testrec1_xm) err1, recid1, msg1 = bibupload.bibupload(recs[0], opt_mode='insert') recid1_inserted_xm = print_record(recid1, 'xm') recid1_inserted_hm = print_record(recid1, 'hm') # insert test record #2: recs = bibupload.xml_marc_to_records(self.testrec2_xm) err2, recid2, msg2 = bibupload.bibupload(recs[0], opt_mode='insert') recid2_inserted_xm = print_record(recid2, 'xm') recid2_inserted_hm = print_record(recid2, 'hm') # let us compare stuff now: self.assertEqual(compare_xmbuffers(remove_tag_001_from_xmbuffer(recid1_inserted_xm), self.testrec1_xm), '') self.assertEqual(compare_hmbuffers(remove_tag_001_from_hmbuffer(recid1_inserted_hm), self.testrec1_hm), '') self.assertEqual(compare_xmbuffers(remove_tag_001_from_xmbuffer(recid2_inserted_xm), self.testrec2_xm), '') self.assertEqual(compare_hmbuffers(remove_tag_001_from_hmbuffer(recid2_inserted_hm), self.testrec2_hm), '') class BibUploadControlledProvenanceTest(GenericBibUploadTest): """Testing treatment of tags under controlled provenance in the correct mode.""" def setUp(self): """Initialize the MARCXML test record.""" GenericBibUploadTest.setUp(self) self.testrec1_xm = """ 123456789 SzGeCERN Test, Jane Test Institute Test title blabla sam blublu sim human """ self.testrec1_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, Jane$$uTest Institute 245__ $$aTest title 6531_ $$9sam$$ablabla 6531_ $$9sim$$ablublu 6531_ $$ahuman """ self.testrec1_xm_to_correct = """ 123456789 bleble sim bloblo som """ self.testrec1_corrected_xm = """ 123456789 SzGeCERN Test, Jane Test Institute Test title blabla sam human bleble sim bloblo som """ self.testrec1_corrected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, Jane$$uTest Institute 245__ $$aTest title 6531_ $$9sam$$ablabla 6531_ $$ahuman 6531_ $$9sim$$ableble 6531_ $$9som$$abloblo """ # insert test record: test_record_xm = self.testrec1_xm.replace('123456789', '') recs = bibupload.xml_marc_to_records(test_record_xm) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recID: self.testrec1_xm = self.testrec1_xm.replace('123456789', str(recid)) self.testrec1_hm = self.testrec1_hm.replace('123456789', str(recid)) self.testrec1_xm_to_correct = self.testrec1_xm_to_correct.replace('123456789', str(recid)) self.testrec1_corrected_xm = self.testrec1_corrected_xm.replace('123456789', str(recid)) self.testrec1_corrected_hm = self.testrec1_corrected_hm.replace('123456789', str(recid)) # test of the inserted record: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, self.testrec1_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.testrec1_hm), '') def test_controlled_provenance_persistence(self): """bibupload - correct mode, tags with controlled provenance""" # correct metadata tags; will the protected tags be kept? recs = bibupload.xml_marc_to_records(self.testrec1_xm_to_correct) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='correct') corrected_xm = print_record(recid, 'xm') corrected_hm = print_record(recid, 'hm') # did it work? self.assertEqual(compare_xmbuffers(corrected_xm, self.testrec1_corrected_xm), '') self.assertEqual(compare_hmbuffers(corrected_hm, self.testrec1_corrected_hm), '') class BibUploadStrongTagsTest(GenericBibUploadTest): """Testing treatment of strong tags and the replace mode.""" def setUp(self): """Initialize the MARCXML test record.""" GenericBibUploadTest.setUp(self) self.testrec1_xm = """ 123456789 SzGeCERN Test, Jane Test Institute Test title A value Another value """ % {'strong_tag': bibupload.CFG_BIBUPLOAD_STRONG_TAGS[0]} self.testrec1_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, Jane$$uTest Institute 245__ $$aTest title %(strong_tag)s__ $$aA value$$bAnother value """ % {'strong_tag': bibupload.CFG_BIBUPLOAD_STRONG_TAGS[0]} self.testrec1_xm_to_replace = """ 123456789 Test, Joseph Test Academy """ self.testrec1_replaced_xm = """ 123456789 Test, Joseph Test Academy A value Another value """ % {'strong_tag': bibupload.CFG_BIBUPLOAD_STRONG_TAGS[0]} self.testrec1_replaced_hm = """ 001__ 123456789 100__ $$aTest, Joseph$$uTest Academy %(strong_tag)s__ $$aA value$$bAnother value """ % {'strong_tag': bibupload.CFG_BIBUPLOAD_STRONG_TAGS[0]} # insert test record: test_record_xm = self.testrec1_xm.replace('123456789', '') recs = bibupload.xml_marc_to_records(test_record_xm) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recID: self.testrec1_xm = self.testrec1_xm.replace('123456789', str(recid)) self.testrec1_hm = self.testrec1_hm.replace('123456789', str(recid)) self.testrec1_xm_to_replace = self.testrec1_xm_to_replace.replace('123456789', str(recid)) self.testrec1_replaced_xm = self.testrec1_replaced_xm.replace('123456789', str(recid)) self.testrec1_replaced_hm = self.testrec1_replaced_hm.replace('123456789', str(recid)) # test of the inserted record: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, self.testrec1_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, self.testrec1_hm), '') def test_strong_tags_persistence(self): """bibupload - strong tags, persistence in replace mode""" # replace all metadata tags; will the strong tags be kept? recs = bibupload.xml_marc_to_records(self.testrec1_xm_to_replace) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='replace') replaced_xm = print_record(recid, 'xm') replaced_hm = print_record(recid, 'hm') # did it work? self.assertEqual(compare_xmbuffers(replaced_xm, self.testrec1_replaced_xm), '') self.assertEqual(compare_hmbuffers(replaced_hm, self.testrec1_replaced_hm), '') class BibUploadPretendTest(GenericBibUploadTest): """ Testing bibupload --pretend correctness. """ def setUp(self): GenericBibUploadTest.setUp(self) self.demo_data = bibupload.xml_marc_to_records(open(os.path.join(CFG_TMPDIR, 'demobibdata.xml')).read())[0] self.before = self._get_tables_fingerprint() task_set_task_param('pretend', True) def tearDown(self): task_set_task_param('pretend', False) def _get_tables_fingerprint(): """ Take lenght and last modification time of all the tables that might be touched by bibupload and return them in a nice structure. """ fingerprint = {} tables = ['bibrec', 'bibdoc', 'bibrec_bibdoc', 'bibdoc_bibdoc', 'bibfmt', 'hstDOCUMENT', 'hstRECORD'] for i in xrange(100): tables.append('bib%02dx' % i) tables.append('bibrec_bib%02dx' % i) for table in tables: fingerprint[table] = get_table_status_info(table) return fingerprint _get_tables_fingerprint = staticmethod(_get_tables_fingerprint) def _checks_tables_fingerprints(before, after): """ Checks differences in table_fingerprints. """ err = True for table in before.keys(): if before[table] != after[table]: print >> sys.stderr, "Table %s has been modified: before was [%s], after was [%s]" % (table, pprint.pformat(before[table]), pprint.pformat(after[table])) err = False return err _checks_tables_fingerprints = staticmethod(_checks_tables_fingerprints) def test_pretend_insert(self): """bibupload - pretend insert""" bibupload.bibupload(self.demo_data, opt_mode='insert', pretend=True) self.failUnless(self._checks_tables_fingerprints(self.before, self._get_tables_fingerprint())) def test_pretend_correct(self): """bibupload - pretend correct""" bibupload.bibupload(self.demo_data, opt_mode='correct', pretend=True) self.failUnless(self._checks_tables_fingerprints(self.before, self._get_tables_fingerprint())) def test_pretend_replace(self): """bibupload - pretend replace""" bibupload.bibupload(self.demo_data, opt_mode='replace', pretend=True) self.failUnless(self._checks_tables_fingerprints(self.before, self._get_tables_fingerprint())) def test_pretend_append(self): """bibupload - pretend append""" bibupload.bibupload(self.demo_data, opt_mode='append', pretend=True) self.failUnless(self._checks_tables_fingerprints(self.before, self._get_tables_fingerprint())) def test_pretend_replace_or_insert(self): """bibupload - pretend replace or insert""" bibupload.bibupload(self.demo_data, opt_mode='replace_or_insert', pretend=True) self.failUnless(self._checks_tables_fingerprints(self.before, self._get_tables_fingerprint())) def test_pretend_holdingpen(self): """bibupload - pretend holdingpen""" bibupload.bibupload(self.demo_data, opt_mode='holdingpen', pretend=True) self.failUnless(self._checks_tables_fingerprints(self.before, self._get_tables_fingerprint())) def test_pretend_delete(self): """bibupload - pretend delete""" bibupload.bibupload(self.demo_data, opt_mode='delete', pretend=True) self.failUnless(self._checks_tables_fingerprints(self.before, self._get_tables_fingerprint())) def test_pretend_reference(self): """bibupload - pretend reference""" bibupload.bibupload(self.demo_data, opt_mode='reference', pretend=True) self.failUnless(self._checks_tables_fingerprints(self.before, self._get_tables_fingerprint())) class BibUploadHoldingPenTest(GenericBibUploadTest): """ Testing the Holding Pen usage. """ def setUp(self): GenericBibUploadTest.setUp(self) self.verbose = 9 setup_loggers() task_set_task_param('verbose', self.verbose) self.recid = 10 self.oai_id = "oai:cds.cern.ch:CERN-EP-2001-094" def test_holding_pen_upload_with_recid(self): """bibupload - holding pen upload with recid""" test_to_upload = """ %s Kleefeld, F Newcomer, Y Rupp, G Scadron, M D """ % self.recid recs = bibupload.xml_marc_to_records(test_to_upload) bibupload.insert_record_into_holding_pen(recs[0], "") res = run_sql("SELECT changeset_xml FROM bibHOLDINGPEN WHERE id_bibrec=%s", (self.recid, )) self.failUnless("Rupp, G" in res[0][0]) def test_holding_pen_upload_with_oai_id(self): """bibupload - holding pen upload with oai_id""" test_to_upload = """ Kleefeld, F Newcomer, Y Rupp, G Scadron, M D %(value)s """ % {'extoaiidtag': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], 'extoaiidind1': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3:4] or " ", 'extoaiidind2': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] != "_" and \ CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4:5] or " ", 'extoaiidsubfieldcode': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5:6], 'value': self.oai_id } recs = bibupload.xml_marc_to_records(test_to_upload) bibupload.insert_record_into_holding_pen(recs[0], self.oai_id) res = run_sql("SELECT changeset_xml FROM bibHOLDINGPEN WHERE id_bibrec=%s AND oai_id=%s", (self.recid, self.oai_id)) self.failUnless("Rupp, G" in res[0][0]) def tearDown(self): GenericBibUploadTest.tearDown(self) run_sql("DELETE FROM bibHOLDINGPEN WHERE id_bibrec=%s", (self.recid, )) class BibUploadFFTModeTest(GenericBibUploadTest): """ Testing treatment of fulltext file transfer import mode. """ def _test_bibdoc_status(self, recid, docname, status): res = run_sql('SELECT bd.status FROM bibrec_bibdoc as bb JOIN bibdoc as bd ON bb.id_bibdoc = bd.id WHERE bb.id_bibrec = %s AND bd.docname = %s', (recid, docname)) self.failUnless(res) self.assertEqual(status, res[0][0]) def test_writing_rights(self): """bibupload - FFT has writing rights""" self.failUnless(bibupload.writing_rights_p()) def test_simple_fft_insert(self): """bibupload - simple FFT insert""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self.failUnless(try_url_download(testrec_expected_url)) def test_fft_insert_with_valid_embargo(self): """bibupload - FFT insert with valid embargo""" # define the test case: import time future_date = time.strftime('%Y-%m-%d', time.gmtime(time.time() + 24 * 3600 * 2)) test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif firerole: deny until '%(future_date)s' allow any """ % { 'future_date': future_date, 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') result = urlopen(testrec_expected_url).read() self.failUnless("This file is restricted." in result, result) def test_fft_insert_with_expired_embargo(self): """bibupload - FFT insert with expired embargo""" # define the test case: import time past_date = time.strftime('%Y-%m-%d', time.gmtime(time.time() - 24 * 3600 * 2)) test_to_upload = """ SzGeCERN Test, John Test University ARTICLE %(siteurl)s/img/site_logo.gif firerole: deny until '%(past_date)s' allow any """ % { 'past_date': past_date, 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif ARTICLE """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif 980__ $$aARTICLE """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') result = urlopen(testrec_expected_url).read() self.failIf("If you already have an account, please login using the form below." in result, result) self.assertEqual(test_web_page_content(testrec_expected_url, 'hyde', 'h123yde', expected_text='Authorization failure'), []) force_webcoll(recid) self.assertEqual(test_web_page_content(testrec_expected_url, 'hyde', 'h123yde', expected_text=urlopen("%(siteurl)s/img/site_logo.gif" % { 'siteurl': CFG_SITE_URL }).read()), []) def test_exotic_format_fft_append(self): """bibupload - exotic format FFT append""" # define the test case: testfile = os.path.join(CFG_TMPDIR, 'test.ps.Z') open(testfile, 'w').write('TEST') email_tag = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][0:3] email_ind1 = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][3] email_ind2 = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][4] email_code = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][5] test_to_upload = """ SzGeCERN Test, John Test University jekyll@cds.cern.ch """ % { 'email_tag': email_tag, 'email_ind1': email_ind1 == '_' and ' ' or email_ind1, 'email_ind2': email_ind2 == '_' and ' ' or email_ind2, 'email_code': email_code} testrec_to_append = """ 123456789 %s """ % testfile testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University jekyll@cds.cern.ch %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/test.ps.Z """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'email_tag': email_tag, 'email_ind1': email_ind1 == '_' and ' ' or email_ind1, 'email_ind2': email_ind2 == '_' and ' ' or email_ind2, 'email_code': email_code} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University %(email_tag)s%(email_ind1)s%(email_ind2)s $$%(email_code)sjekyll@cds.cern.ch 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/test.ps.Z """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'email_tag': email_tag, 'email_ind1': email_ind1 == ' ' and '_' or email_ind1, 'email_ind2': email_ind2 == ' ' and '_' or email_ind2, 'email_code': email_code} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/test.ps.Z" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url2 = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/test?format=ps.Z" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_to_append = testrec_to_append.replace('123456789', str(recid)) testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) testrec_expected_url2 = testrec_expected_url.replace('123456789', str(recid)) recs = bibupload.xml_marc_to_records(testrec_to_append) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='append') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self.assertEqual(test_web_page_content(testrec_expected_url, 'jekyll', 'j123ekyll', expected_text='TEST'), []) self.assertEqual(test_web_page_content(testrec_expected_url2, 'jekyll', 'j123ekyll', expected_text='TEST'), []) def test_fft_check_md5_through_bibrecdoc_str(self): """bibupload - simple FFT insert, check md5 through BibRecDocs.str()""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %s/img/head.gif """ % CFG_SITE_URL # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') original_md5 = md5(urlopen('%s/img/head.gif' % CFG_SITE_URL).read()).hexdigest() bibrec_str = str(BibRecDocs(int(recid))) md5_found = False for row in bibrec_str.split('\n'): if 'checksum' in row: if original_md5 in row: md5_found = True self.failUnless(md5_found) def test_detailed_fft_insert(self): """bibupload - detailed FFT insert""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif SuperMain This is a description This is a comment CIDIESSE %(siteurl)s/img/rss.png SuperMain .jpeg This is a description This is a second comment CIDIESSE """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/CIDIESSE.gif This is a description This is a comment %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/CIDIESSE.jpeg This is a description This is a second comment """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/CIDIESSE.gif$$yThis is a description$$zThis is a comment 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/CIDIESSE.jpeg$$yThis is a description$$zThis is a second comment """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url1 = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/CIDIESSE.gif" % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url2 = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/CIDIESSE.jpeg" % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url1 = testrec_expected_url1.replace('123456789', str(recid)) testrec_expected_url2 = testrec_expected_url1.replace('123456789', str(recid)) # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self.failUnless(try_url_download(testrec_expected_url1)) self.failUnless(try_url_download(testrec_expected_url2)) def test_simple_fft_insert_with_restriction(self): """bibupload - simple FFT insert with restriction""" # define the test case: email_tag = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][0:3] email_ind1 = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][3] email_ind2 = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][4] email_code = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][5] test_to_upload = """ SzGeCERN Test, John Test University jekyll@cds.cern.ch ARTICLE %(siteurl)s/img/site_logo.gif thesis %(siteurl)s/img/sb.gif """ % {'email_tag': email_tag, 'email_ind1': email_ind1 == '_' and ' ' or email_ind1, 'email_ind2': email_ind2 == '_' and ' ' or email_ind2, 'email_code': email_code, 'siteurl': CFG_SITE_URL} testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University jekyll@cds.cern.ch %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif?subformat=icon icon ARTICLE """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'email_tag': email_tag, 'email_ind1': email_ind1 == '_' and ' ' or email_ind1, 'email_ind2': email_ind2 == '_' and ' ' or email_ind2, 'email_code': email_code} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University %(email_tag)s%(email_ind1)s%(email_ind2)s $$%(email_code)sjekyll@cds.cern.ch 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif?subformat=icon$$xicon 980__ $$aARTICLE """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'email_tag': email_tag, 'email_ind1': email_ind1 == ' ' and '_' or email_ind1, 'email_ind2': email_ind2 == ' ' and '_' or email_ind2, 'email_code': email_code} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_icon = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif?subformat=icon" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) testrec_expected_icon = testrec_expected_icon.replace('123456789', str(recid)) # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self.assertEqual(test_web_page_content(testrec_expected_icon, 'jekyll', 'j123ekyll', expected_text=urlopen('%(siteurl)s/img/sb.gif' % { 'siteurl': CFG_SITE_URL }).read()), []) self.assertEqual(test_web_page_content(testrec_expected_icon, 'hyde', 'h123yde', expected_text='Authorization failure'), []) force_webcoll(recid) self.assertEqual(test_web_page_content(testrec_expected_icon, 'hyde', 'h123yde', expected_text=urlopen('%(siteurl)s/img/restricted.gif' % {'siteurl': CFG_SITE_URL}).read()), []) self.failUnless("HTTP Error 401: Unauthorized" in test_web_page_content(testrec_expected_url, 'hyde', 'h123yde')[0]) self.failUnless("This file is restricted." in urlopen(testrec_expected_url).read()) def test_simple_fft_insert_with_icon(self): """bibupload - simple FFT insert with icon""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif %(siteurl)s/img/sb.gif """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif?subformat=icon icon """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif?subformat=icon$$xicon """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_icon = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif?subformat=icon" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) testrec_expected_icon = testrec_expected_icon.replace('123456789', str(recid)) # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self.failUnless(try_url_download(testrec_expected_url)) self.failUnless(try_url_download(testrec_expected_icon)) def test_multiple_fft_insert(self): """bibupload - multiple FFT insert""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif %(siteurl)s/img/head.gif %(siteurl)s/%(CFG_SITE_RECORD)s/95/files/9809057.pdf %(prefix)s/var/tmp/demobibdata.xml """ % { 'prefix': CFG_PREFIX, 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/9809057.pdf %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/demobibdata.xml %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/head.gif %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/9809057.pdf 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/demobibdata.xml 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/head.gif 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: testrec_expected_urls = [] for files in ('site_logo.gif', 'head.gif', '9809057.pdf', 'demobibdata.xml'): testrec_expected_urls.append('%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/%(files)s' % {'siteurl' : CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'files' : files}) recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_urls = [] for files in ('site_logo.gif', 'head.gif', '9809057.pdf', 'demobibdata.xml'): testrec_expected_urls.append('%(siteurl)s/%(CFG_SITE_RECORD)s/%(recid)s/files/%(files)s' % {'siteurl' : CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'files' : files, 'recid' : recid}) # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') for url in testrec_expected_urls: self.failUnless(try_url_download(url)) self._test_bibdoc_status(recid, 'head', '') self._test_bibdoc_status(recid, '9809057', '') self._test_bibdoc_status(recid, 'site_logo', '') self._test_bibdoc_status(recid, 'demobibdata', '') def test_simple_fft_correct(self): """bibupload - simple FFT correct""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif """ % { 'siteurl': CFG_SITE_URL } test_to_correct = """ 123456789 %(siteurl)s/img/sb.gif site_logo """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_correct = test_to_correct.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_correct) bibupload.bibupload(recs[0], opt_mode='correct') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self._test_bibdoc_status(recid, 'site_logo', '') def test_fft_correct_already_exists(self): """bibupload - FFT correct with already identical existing file""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif a description %(siteurl)s/img/help.png site_logo another description %(siteurl)s/img/rss.png %(siteurl)s/img/line.gif %(siteurl)s/img/merge.png line """ % { 'siteurl': CFG_SITE_URL } test_to_correct = """ 123456789 %(siteurl)s/img/site_logo.gif a second description %(siteurl)s/img/help.png site_logo another second description %(siteurl)s/img/refresh.png rss %(siteurl)s/img/line.gif %(siteurl)s/img/merge-small.png line """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/line.gif %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/line.png %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/rss.png %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif a second description %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.png another second description """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/line.gif 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/line.png 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/rss.png 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif$$ya second description 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.png$$yanother second description """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url2 = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/rss.png" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url3 = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.png" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url4 = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/line.png" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url5 = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/line.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) testrec_expected_url2 = testrec_expected_url2.replace('123456789', str(recid)) testrec_expected_url3 = testrec_expected_url3.replace('123456789', str(recid)) testrec_expected_url4 = testrec_expected_url4.replace('123456789', str(recid)) testrec_expected_url5 = testrec_expected_url5.replace('123456789', str(recid)) test_to_correct = test_to_correct.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_correct) bibupload.bibupload(recs[0], opt_mode='correct') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.failUnless(try_url_download(testrec_expected_url2)) self.failUnless(try_url_download(testrec_expected_url3)) self.failUnless(try_url_download(testrec_expected_url4)) self.failUnless(try_url_download(testrec_expected_url5)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') bibrecdocs = BibRecDocs(recid) self.failUnless(bibrecdocs.get_bibdoc('rss').list_versions(), [1, 2]) self.failUnless(bibrecdocs.get_bibdoc('site_logo').list_versions(), [1]) self.failUnless(bibrecdocs.get_bibdoc('line').list_versions(), [1, 2]) def test_fft_append_already_exists(self): """bibupload - FFT append with already identical existing file""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif a description """ % { 'siteurl': CFG_SITE_URL } test_to_append = """ 123456789 %(siteurl)s/img/site_logo.gif a second description %(siteurl)s/img/help.png site_logo another second description """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif a description %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.png another second description """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif$$ya description 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.png$$yanother second description """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url2 = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.png" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_append = test_to_append.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_append) bibupload.bibupload(recs[0], opt_mode='append') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.failUnless(try_url_download(testrec_expected_url2)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') bibrecdocs = BibRecDocs(recid) def test_fft_implicit_fix_marc(self): """bibupload - FFT implicit FIX-MARC""" test_to_upload = """ SzGeCERN Test, John Test University foo@bar.com %(siteurl)s/img/site_logo.gif """ % { 'siteurl': CFG_SITE_URL } test_to_correct = """ 123456789 foo@bar.com %(siteurl)s/img/site_logo.gif %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University foo@bar.com %(siteurl)s/img/site_logo.gif """ % { 'siteurl': CFG_SITE_URL } testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8560_ $$ffoo@bar.com 8564_ $$u%(siteurl)s/img/site_logo.gif """ % { 'siteurl': CFG_SITE_URL } recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: test_to_correct = test_to_correct.replace('123456789', str(recid)) testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) # correct test record with implicit FIX-MARC: recs = bibupload.xml_marc_to_records(test_to_correct) bibupload.bibupload(recs[0], opt_mode='correct') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') def test_fft_vs_bibedit(self): """bibupload - FFT Vs. BibEdit compatibility""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif """ % { 'siteurl': CFG_SITE_URL } test_to_replace = """ 123456789 SzGeCERN Test, John Test University http://www.google.com/ BibEdit Comment %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif BibEdit Description 01 http://cern.ch/ """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_xm = str(test_to_replace) testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$uhttp://www.google.com/ 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif$$x01$$yBibEdit Description$$zBibEdit Comment 8564_ $$uhttp://cern.ch/ """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_replace = test_to_replace.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_replace) bibupload.bibupload(recs[0], opt_mode='replace') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self._test_bibdoc_status(recid, 'site_logo', '') bibrecdocs = BibRecDocs(recid) bibdoc = bibrecdocs.get_bibdoc('site_logo') self.assertEqual(bibdoc.get_description('.gif'), 'BibEdit Description') def test_detailed_fft_correct(self): """bibupload - detailed FFT correct""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif Try Comment """ % { 'siteurl': CFG_SITE_URL } test_to_correct = """ 123456789 %(siteurl)s/img/head.gif site_logo patata Next Try KEEP-OLD-VALUE """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/patata.gif Next Try Comment """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/patata.gif$$yNext Try$$zComment """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/patata.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_correct = test_to_correct.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_correct) bibupload.bibupload(recs[0], opt_mode='correct') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self._test_bibdoc_status(recid, 'patata', '') def test_no_url_fft_correct(self): """bibupload - no_url FFT correct""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif Try Comment """ % { 'siteurl': CFG_SITE_URL } test_to_correct = """ 123456789 site_logo patata .gif KEEP-OLD-VALUE Next Comment """ testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/patata.gif Try Next Comment """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/patata.gif$$yTry$$zNext Comment """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/patata.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_correct = test_to_correct.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_correct) bibupload.bibupload(recs[0], opt_mode='correct') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self._test_bibdoc_status(recid, 'patata', '') def test_new_icon_fft_append(self): """bibupload - new icon FFT append""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University """ test_to_correct = """ 123456789 site_logo %(siteurl)s/img/site_logo.gif """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif?subformat=icon icon """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif?subformat=icon$$xicon """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif?subformat=icon" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_correct = test_to_correct.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_correct) bibupload.bibupload(recs[0], opt_mode='append') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self._test_bibdoc_status(recid, 'site_logo', '') def test_multiple_fft_correct(self): """bibupload - multiple FFT correct""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif Try Comment Restricted %(siteurl)s/img/okay.gif site_logo .jpeg Try jpeg Comment jpeg Restricted """ % { 'siteurl': CFG_SITE_URL } test_to_correct = """ 123456789 %(siteurl)s/img/loading.gif site_logo patata .gif New restricted """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/patata.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/patata.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/patata.gif" \ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_correct = test_to_correct.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_correct) bibupload.bibupload(recs[0], opt_mode='correct') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless("This file is restricted." in urlopen(testrec_expected_url).read()) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self._test_bibdoc_status(recid, 'patata', 'New restricted') def test_purge_fft_correct(self): """bibupload - purge FFT correct""" # define the test case: test_to_upload = """ SzGeCERN Test, John Test University %(siteurl)s/img/site_logo.gif %(siteurl)s/img/head.gif """ % { 'siteurl': CFG_SITE_URL } test_to_correct = """ 123456789 %(siteurl)s/img/site_logo.gif """ % { 'siteurl': CFG_SITE_URL } test_to_purge = """ 123456789 %(siteurl)s/img/site_logo.gif PURGE """ % { 'siteurl': CFG_SITE_URL } testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/head.gif %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/head.gif 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_correct = test_to_correct.replace('123456789', str(recid)) test_to_purge = test_to_purge.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_correct) bibupload.bibupload(recs[0], opt_mode='correct') # purge test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_purge) bibupload.bibupload(recs[0], opt_mode='correct') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self._test_bibdoc_status(recid, 'site_logo', '') self._test_bibdoc_status(recid, 'head', '') def test_revert_fft_correct(self): """bibupload - revert FFT correct""" # define the test case: email_tag = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][0:3] email_ind1 = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][3] email_ind2 = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][4] email_code = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][5] test_to_upload = """ SzGeCERN Test, John Test University jekyll@cds.cern.ch %(siteurl)s/img/iconpen.gif site_logo """ % { 'siteurl': CFG_SITE_URL, 'email_tag': email_tag, 'email_ind1': email_ind1 == '_' and ' ' or email_ind1, 'email_ind2': email_ind2 == '_' and ' ' or email_ind2, 'email_code': email_code} test_to_correct = """ 123456789 %s/img/head.gif site_logo """ % CFG_SITE_URL test_to_revert = """ 123456789 site_logo REVERT 1 """ testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University jekyll@cds.cern.ch %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'email_tag': email_tag, 'email_ind1': email_ind1 == '_' and ' ' or email_ind1, 'email_ind2': email_ind2 == '_' and ' ' or email_ind2, 'email_code': email_code} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University %(email_tag)s%(email_ind1)s%(email_ind2)s $$%(email_code)sjekyll@cds.cern.ch 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif """ % {'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'email_tag': email_tag, 'email_ind1': email_ind1 == ' ' and '_' or email_ind1, 'email_ind2': email_ind2 == ' ' and '_' or email_ind2, 'email_code': email_code} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/site_logo.gif" % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_correct = test_to_correct.replace('123456789', str(recid)) test_to_revert = test_to_revert.replace('123456789', str(recid)) # correct test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_correct) bibupload.bibupload(recs[0], opt_mode='correct') # revert test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_revert) bibupload.bibupload(recs[0], opt_mode='correct') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') self._test_bibdoc_status(recid, 'site_logo', '') expected_content_version1 = urlopen('%s/img/iconpen.gif' % CFG_SITE_URL).read() expected_content_version2 = urlopen('%s/img/head.gif' % CFG_SITE_URL).read() expected_content_version3 = expected_content_version1 self.assertEqual(test_web_page_content('%s/%s/%s/files/site_logo.gif?version=1' % (CFG_SITE_URL, CFG_SITE_RECORD, recid), 'jekyll', 'j123ekyll', expected_content_version1), []) self.assertEqual(test_web_page_content('%s/%s/%s/files/site_logo.gif?version=2' % (CFG_SITE_URL, CFG_SITE_RECORD, recid), 'jekyll', 'j123ekyll', expected_content_version2), []) self.assertEqual(test_web_page_content('%s/%s/%s/files/site_logo.gif?version=3' % (CFG_SITE_URL, CFG_SITE_RECORD, recid), 'jekyll', 'j123ekyll', expected_content_version3), []) def test_simple_fft_replace(self): """bibupload - simple FFT replace""" # define the test case: email_tag = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][0:3] email_ind1 = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][3] email_ind2 = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][4] email_code = CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS[0][5] test_to_upload = """ SzGeCERN Test, John Test University jekyll@cds.cern.ch %(siteurl)s/img/iconpen.gif site_logo """ % {'siteurl': CFG_SITE_URL, 'email_tag': email_tag, 'email_ind1': email_ind1 == '_' and ' ' or email_ind1, 'email_ind2': email_ind2 == '_' and ' ' or email_ind2, 'email_code': email_code} test_to_replace = """ 123456789 SzGeCERN Test, John Test University jekyll@cds.cern.ch %(siteurl)s/img/head.gif """ % {'siteurl': CFG_SITE_URL, 'email_tag': email_tag, 'email_ind1': email_ind1 == '_' and ' ' or email_ind1, 'email_ind2': email_ind2 == '_' and ' ' or email_ind2, 'email_code': email_code} testrec_expected_xm = """ 123456789 SzGeCERN Test, John Test University jekyll@cds.cern.ch %(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/head.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'email_tag': email_tag, 'email_ind1': email_ind1 == '_' and ' ' or email_ind1, 'email_ind2': email_ind2 == '_' and ' ' or email_ind2, 'email_code': email_code} testrec_expected_hm = """ 001__ 123456789 003__ SzGeCERN 100__ $$aTest, John$$uTest University %(email_tag)s%(email_ind1)s%(email_ind2)s $$%(email_code)sjekyll@cds.cern.ch 8564_ $$u%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/head.gif """ % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'email_tag': email_tag, 'email_ind1': email_ind1 == ' ' and '_' or email_ind1, 'email_ind2': email_ind2 == ' ' and '_' or email_ind2, 'email_code': email_code} testrec_expected_url = "%(siteurl)s/%(CFG_SITE_RECORD)s/123456789/files/head.gif" % { 'siteurl': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD} # insert test record: recs = bibupload.xml_marc_to_records(test_to_upload) err, recid, msg = bibupload.bibupload(recs[0], opt_mode='insert') # replace test buffers with real recid of inserted test record: testrec_expected_xm = testrec_expected_xm.replace('123456789', str(recid)) testrec_expected_hm = testrec_expected_hm.replace('123456789', str(recid)) testrec_expected_url = testrec_expected_url.replace('123456789', str(recid)) test_to_replace = test_to_replace.replace('123456789', str(recid)) # replace test record with new FFT: recs = bibupload.xml_marc_to_records(test_to_replace) bibupload.bibupload(recs[0], opt_mode='replace') # compare expected results: inserted_xm = print_record(recid, 'xm') inserted_hm = print_record(recid, 'hm') self.failUnless(try_url_download(testrec_expected_url)) self.assertEqual(compare_xmbuffers(inserted_xm, testrec_expected_xm), '') self.assertEqual(compare_hmbuffers(inserted_hm, testrec_expected_hm), '') expected_content_version = urlopen('%s/img/head.gif' % CFG_SITE_URL).read() self.assertEqual(test_web_page_content(testrec_expected_url, 'hyde', 'h123yde', expected_text='Authorization failure'), []) self.assertEqual(test_web_page_content(testrec_expected_url, 'jekyll', 'j123ekyll', expected_text=expected_content_version), []) TEST_SUITE = make_test_suite(BibUploadHoldingPenTest, BibUploadInsertModeTest, BibUploadAppendModeTest, BibUploadCorrectModeTest, BibUploadDeleteModeTest, BibUploadReplaceModeTest, BibUploadReferencesModeTest, BibUploadRecordsWithSYSNOTest, BibUploadRecordsWithEXTOAIIDTest, BibUploadRecordsWithOAIIDTest, BibUploadFMTModeTest, BibUploadIndicatorsTest, BibUploadUpperLowerCaseTest, BibUploadControlledProvenanceTest, BibUploadStrongTagsTest, BibUploadFFTModeTest, BibUploadPretendTest, BibUploadCallbackURLTest ) if __name__ == "__main__": run_test_suite(TEST_SUITE, warn_user=True) diff --git a/modules/miscutil/lib/Makefile.am b/modules/miscutil/lib/Makefile.am index 47ca2dccb..a75b89080 100644 --- a/modules/miscutil/lib/Makefile.am +++ b/modules/miscutil/lib/Makefile.am @@ -1,103 +1,105 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. pylibdir = $(libdir)/python/invenio pylib_DATA = __init__.py \ errorlib.py \ errorlib_tests.py \ errorlib_webinterface.py \ errorlib_regression_tests.py \ data_cacher.py \ dbdump.py \ + web_api_key.py \ + web_api_key_tests.py \ dbquery.py \ dbquery_tests.py \ dbquery_regression_tests.py \ logicutils.py \ logicutils_tests.py \ mailutils.py \ miscutil_config.py \ messages.py \ messages_tests.py \ textutils.py \ textutils_tests.py \ dateutils.py \ dateutils_tests.py \ htmlutils.py \ htmlutils_tests.py \ testutils.py \ testutils_regression_tests.py \ invenio_connector.py \ invenio_connector_regression_tests.py \ urlutils.py \ urlutils_tests.py \ w3c_validator.py \ intbitset_tests.py \ intbitset_helper.py \ inveniocfg.py \ shellutils.py \ shellutils_tests.py \ pluginutils.py \ pluginutils_tests.py \ plotextractor.py \ plotextractor_converter.py \ plotextractor_getter.py \ plotextractor_output_utils.py \ plotextractor_tests.py \ plotextractor_regression_tests.py \ plotextractor_config.py \ solrutils.py \ remote_debugger.py \ remote_debugger_config.py \ remote_debugger_wsgi_reload.py \ jsonutils.py \ - sequtils_cnum.py \ - sequtils.py \ - sequtils_regression_tests.py + sequtils_cnum.py \ + sequtils.py \ + sequtils_regression_tests.py noinst_DATA = testimport.py \ kwalitee.py \ pep8.py tmpdir = $(prefix)/var/tmp tmp_DATA = intbitset_example.int EXTRA_DIST = $(pylib_DATA) \ testimport.py \ kwalitee.py \ pep8.py \ intbitset.pyx \ intbitset.c \ intbitset.h \ intbitset_impl.c \ intbitset_setup.py \ intbitset.pyx \ $(tmp_DATA) all: $(PYTHON) $(srcdir)/intbitset_setup.py build_ext install-data-hook: $(PYTHON) $(srcdir)/testimport.py ${prefix} @find ${srcdir} -name intbitset.so -exec cp {} ${pylibdir} \; CLEANFILES = *~ *.tmp *.pyc clean-local: rm -rf build diff --git a/modules/miscutil/lib/invenio_connector.py b/modules/miscutil/lib/invenio_connector.py index 18babba1e..a673b563f 100644 --- a/modules/miscutil/lib/invenio_connector.py +++ b/modules/miscutil/lib/invenio_connector.py @@ -1,607 +1,610 @@ # -*- coding: utf-8 -*- # ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Tools to connect to distant Invenio servers using Invenio APIs. Example of use: from InvenioConnector import * cds = InvenioConnector("http://cdsweb.cern.ch") results = cds.search("higgs") for record in results: print record["245__a"][0] print record["520__b"][0] for author in record["100__"]: print author["a"][0], author["u"][0] FIXME: - implement cache expiration - exceptions handling - parsing of - better checking of input parameters (especially InvenioConnector.__init__ "url") - improve behaviour when running locally (perform_request_search *requiring* "req" object) """ import urllib import urllib2 import xml.sax import re import tempfile import os import time import sys MECHANIZE_CLIENTFORM_VERSION_CHANGE = (0, 2, 0) try: import mechanize if mechanize.__version__ < MECHANIZE_CLIENTFORM_VERSION_CHANGE: OLD_MECHANIZE_VERSION = True import ClientForm else: OLD_MECHANIZE_VERSION = False MECHANIZE_AVAILABLE = True except ImportError: MECHANIZE_AVAILABLE = False try: # if we are running locally, we can optimize :-) from invenio.config import CFG_SITE_URL, CFG_SITE_SECURE_URL, CFG_SITE_RECORD, CFG_CERN_SITE from invenio.bibtask import task_low_level_submission from invenio.search_engine import perform_request_search, collection_restricted_p from invenio.bibformat import format_records + from invenio.urlutils import make_user_agent_string LOCAL_SITE_URLS = [CFG_SITE_URL, CFG_SITE_SECURE_URL] + CFG_USER_AGENT = make_user_agent_string("invenio_connector") except ImportError: LOCAL_SITE_URLS = None CFG_CERN_SITE = 0 + CFG_USER_AGENT = "invenio_connector" CFG_CDS_URL = "http://cdsweb.cern.ch/" class InvenioConnectorAuthError(Exception): """ This exception is called by InvenioConnector when authentication fails during remote or local connections. """ def __init__(self, value): """ Set the internal "value" attribute to that of the passed "value" parameter. @param value: an error string to display to the user. @type value: string """ Exception.__init__(self) self.value = value def __str__(self): """ Return oneself as a string (actually, return the contents of self.value). @return: representation of error @rtype: string """ return str(self.value) class InvenioConnectorServerError(Exception): """ This exception is called by InvenioConnector when using it on a machine with no Invenio installed and no remote server URL is given during instantiation. """ def __init__(self, value): """ Set the internal "value" attribute to that of the passed "value" parameter. @param value: an error string to display to the user. @type value: string """ Exception.__init__(self) self.value = value def __str__(self): """ Return oneself as a string (actually, return the contents of self.value). @return: representation of error @rtype: string """ return str(self.value) class InvenioConnector(object): """ Creates an connector to a server running Invenio """ def __init__(self, url=None, user="", password="", login_method="Local", local_import_path="invenio"): """ Initialize a new instance of the server at given URL. If the server happens to be running on the local machine, the access will be done directly using the Python APIs. In that case you can choose from which base path to import the necessary file specifying the local_import_path parameter. @param url: the url to which this instance will be connected. Defaults to CFG_SITE_URL, if available. @type url: string @param user: the optional username for interacting with the Invenio instance in an authenticated way. @type user: string @param password: the corresponding password. @type password: string @param login_method: the name of the login method the Invenio instance is expecting for this user (in case there is more than one). @type login_method: string @param local_import_path: the base path from which the connector should try to load the local connector, if available. Eg "invenio" will lead to "import invenio.dbquery" @type local_import_path: string @raise InvenioConnectorAuthError: if no secure URL is given for authentication @raise InvenioConnectorServerError: if no URL is given on a machine without Invenio installed """ if url == None and LOCAL_SITE_URLS != None: self.server_url = LOCAL_SITE_URLS[0] # Default to CFG_SITE_URL elif url == None: raise InvenioConnectorServerError("You do not seem to have Invenio installed and no remote URL is given") else: self.server_url = url self.local = LOCAL_SITE_URLS and self.server_url in LOCAL_SITE_URLS self.cached_queries = {} self.cached_records = {} self.cached_baskets = {} self.user = user self.password = password self.login_method = login_method self.browser = None if self.user: if not self.server_url.startswith('https://'): raise InvenioConnectorAuthError("You have to use a secure URL (HTTPS) to login") if MECHANIZE_AVAILABLE: self._init_browser() self._check_credentials() else: self.user = None raise InvenioConnectorAuthError("The Python module Mechanize (and ClientForm" \ " if Mechanize version < 0.2.0) must" \ " be installed to perform authenticated requests.") def _init_browser(self): """ Ovveride this method with the appropriate way to prepare a logged in browser. """ self.browser = mechanize.Browser() self.browser.set_handle_robots(False) self.browser.open(self.server_url + "/youraccount/login") self.browser.select_form(nr=0) self.browser['p_un'] = self.user self.browser['p_pw'] = self.password # Set login_method to be writable self.browser.form.find_control('login_method').readonly = False self.browser['login_method'] = self.login_method self.browser.submit() def _check_credentials(self): if not 'youraccount/logout' in self.browser.response().read(): raise InvenioConnectorAuthError("It was not possible to successfully login with the provided credentials") def search(self, read_cache=True, **kwparams): """ Returns records corresponding to the given search query. See docstring of invenio.search_engine.perform_request_search() for an overview of available parameters. @raise InvenioConnectorAuthError: if authentication fails """ parse_results = False of = kwparams.get('of', "") if of == "": parse_results = True of = "xm" kwparams['of'] = of params = urllib.urlencode(kwparams, doseq=1) # Are we running locally? If so, better directly access the # search engine directly if self.local and of != 't': # See if user tries to search any restricted collection c = kwparams.get('c', "") if c != "": if type(c) is list: colls = c else: colls = [c] for collection in colls: if collection_restricted_p(collection): if self.user: self._check_credentials() continue raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") kwparams['of'] = 'id' results = perform_request_search(**kwparams) if of.lower() != 'id': results = format_records(results, of) else: if not self.cached_queries.has_key(params + str(parse_results)) or not read_cache: if self.user: results = self.browser.open(self.server_url + "/search?" + params) else: results = urllib2.urlopen(self.server_url + "/search?" + params) if 'youraccount/login' in results.geturl(): # Current user not able to search collection raise InvenioConnectorAuthError("You are trying to search a restricted collection. Please authenticate yourself.\n") else: return self.cached_queries[params + str(parse_results)] if parse_results: # FIXME: we should not try to parse if results is string parsed_records = self._parse_results(results, self.cached_records) self.cached_queries[params + str(parse_results)] = parsed_records return parsed_records else: # pylint: disable=E1103 # The whole point of the following code is to make sure we can # handle two types of variable. try: res = results.read() except AttributeError: res = results # pylint: enable=E1103 if of == "id": try: if type(res) is str: # Transform to list res = [int(recid.strip()) for recid in \ res.strip("[]").split(",") if recid.strip() != ""] res.reverse() except (ValueError, AttributeError): res = [] self.cached_queries[params + str(parse_results)] = res return self.cached_queries[params + str(parse_results)] def search_with_retry(self, sleeptime=3.0, retrycount=3, **params): """ This function performs a search given a dictionary of search(..) parameters. It accounts for server timeouts as necessary and will retry some number of times. @param sleeptime: number of seconds to sleep between retries @type sleeptime: float @param retrycount: number of times to retry given search @type retrycount: int @param params: search parameters @type params: **kwds @rtype: list @return: returns records in given format """ results = [] count = 0 while count < retrycount: try: results = self.search(**params) break except urllib2.URLError: sys.stderr.write("Timeout while searching...Retrying\n") time.sleep(sleeptime) count += 1 else: sys.stderr.write("Aborting search after %d attempts.\n" % (retrycount,)) return results def search_similar_records(self, recid): """ Returns the records similar to the given one """ return self.search(p="recid:" + str(recid), rm="wrd") def search_records_cited_by(self, recid): """ Returns records cited by the given one """ return self.search(p="recid:" + str(recid), rm="citation") def get_records_from_basket(self, bskid, group_basket=False, read_cache=True): """ Returns the records from the (public) basket with given bskid """ if not self.cached_baskets.has_key(bskid) or not read_cache: if self.user: if group_basket: group_basket = '&category=G' else: group_basket = '' results = self.browser.open(self.server_url + \ "/yourbaskets/display?of=xm&bskid=" + str(bskid) + group_basket) else: results = urllib2.urlopen(self.server_url + \ "/yourbaskets/display_public?of=xm&bskid=" + str(bskid)) else: return self.cached_baskets[bskid] parsed_records = self._parse_results(results, self.cached_records) self.cached_baskets[bskid] = parsed_records return parsed_records def get_record(self, recid, read_cache=True): """ Returns the record with given recid """ if self.cached_records.has_key(recid) or not read_cache: return self.cached_records[recid] else: return self.search(p="recid:" + str(recid)) def upload_marcxml(self, marcxml, mode): """ Uploads a record to the server Parameters: marcxml - *str* the XML to upload. mode - *str* the mode to use for the upload. "-i" insert new records "-r" replace existing records "-c" correct fields of records "-a" append fields to records "-ir" insert record or replace if it exists """ if mode not in ["-i", "-r", "-c", "-a", "-ir"]: raise NameError, "Incorrect mode " + str(mode) # Are we running locally? If so, submit directly if self.local: (code, marcxml_filepath) = tempfile.mkstemp(prefix="upload_%s" % \ time.strftime("%Y%m%d_%H%M%S_", time.localtime())) marcxml_file_d = os.fdopen(code, "w") marcxml_file_d.write(marcxml) marcxml_file_d.close() return task_low_level_submission("bibupload", "", mode, marcxml_filepath) else: params = urllib.urlencode({'file': marcxml, 'mode': mode}) ## We don't use self.browser as batchuploader is protected by IP opener = urllib2.build_opener() - opener.addheaders = [('User-agent', 'invenio_webupload')] + opener.addheaders = [('User-Agent', CFG_USER_AGENT)] return opener.open(self.server_url + "/batchuploader/robotupload", params,) def _parse_results(self, results, cached_records): """ Parses the given results (in MARCXML format). The given "cached_records" list is a pool of already existing parsed records (in order to avoid keeping several times the same records in memory) """ parser = xml.sax.make_parser() handler = RecordsHandler(cached_records) parser.setContentHandler(handler) parser.parse(results) return handler.records class Record(dict): """ Represents a Invenio record """ def __init__(self, recid=None, marcxml=None, server_url=None): #dict.__init__(self) self.recid = recid self.marcxml = "" if marcxml is not None: self.marcxml = marcxml #self.record = {} self.server_url = server_url def __setitem__(self, item, value): tag, ind1, ind2, subcode = decompose_code(item) if subcode is not None: #if not dict.has_key(self, tag + ind1 + ind2): # dict.__setitem__(self, tag + ind1 + ind2, []) dict.__setitem__(self, tag + ind1 + ind2, [{subcode: [value]}]) else: dict.__setitem__(self, tag + ind1 + ind2, value) def __getitem__(self, item): tag, ind1, ind2, subcode = decompose_code(item) datafields = dict.__getitem__(self, tag + ind1 + ind2) if subcode is not None: subfields = [] for datafield in datafields: if datafield.has_key(subcode): subfields.extend(datafield[subcode]) return subfields else: return datafields def __contains__(self, item): return dict.__contains__(item) def __repr__(self): return "Record(" + dict.__repr__(self) + ")" def __str__(self): return self.marcxml def export(self, of="marcxml"): """ Returns the record in chosen format """ return self.marcxml def url(self): """ Returns the URL to this record. Returns None if not known """ if self.server_url is not None and \ self.recid is not None: return self.server_url + "/"+ CFG_SITE_RECORD +"/" + str(self.recid) else: return None class _SGMLParserFactory(mechanize.DefaultFactory): """ Black magic to be able to interact with CERN SSO forms. """ def __init__(self, i_want_broken_xhtml_support=False): if OLD_MECHANIZE_VERSION: forms_factory = mechanize.FormsFactory( form_parser_class=ClientForm.XHTMLCompatibleFormParser) else: forms_factory = mechanize.FormsFactory( form_parser_class=mechanize.XHTMLCompatibleFormParser) mechanize.Factory.__init__( self, forms_factory=forms_factory, links_factory=mechanize.LinksFactory(), title_factory=mechanize.TitleFactory(), response_type_finder=mechanize._html.ResponseTypeFinder( allow_xhtml=i_want_broken_xhtml_support), ) class CDSInvenioConnector(InvenioConnector): def __init__(self, user="", password="", local_import_path="invenio"): """ This is a specialized InvenioConnector class suitable to connecte to the CERN Document Server (CDS), which uses centralize SSO. """ super(CDSInvenioConnector, self).__init__(CFG_CDS_URL, user, password, local_import_path=local_import_path) def _init_browser(self): """ @note: update this everytime the CERN SSO login form is refactored. """ self.browser = mechanize.Browser(factory=_SGMLParserFactory(i_want_broken_xhtml_support=True)) self.browser.set_handle_robots(False) self.browser.open(self.server_url) self.browser.follow_link(text_regex="login") self.browser.select_form(nr=0) self.browser.form['ctl00$ContentPlaceHolder1$txtFormsLogin'] = self.user self.browser.form['ctl00$ContentPlaceHolder1$txtFormsPassword'] = self.password self.browser.submit() self.browser.select_form(nr=0) self.browser.submit() class RecordsHandler(xml.sax.handler.ContentHandler): "MARCXML Parser" def __init__(self, records): """ Parameters: records - *dict* a dictionary with an already existing cache of records """ self.cached_records = records self.records = [] self.in_record = False self.in_controlfield = False self.in_datafield = False self.in_subfield = False self.cur_tag = None self.cur_subfield = None self.cur_controlfield = None self.cur_datafield = None self.cur_record = None self.recid = 0 self.buffer = "" self.counts = 0 def startElement(self, name, attributes): if name == "record": self.cur_record = Record() self.in_record = True elif name == "controlfield": tag = attributes["tag"] self.cur_datafield = "" self.cur_tag = tag self.cur_controlfield = [] if not self.cur_record.has_key(tag): self.cur_record[tag] = self.cur_controlfield self.in_controlfield = True elif name == "datafield": tag = attributes["tag"] self.cur_tag = tag ind1 = attributes["ind1"] if ind1 == " ": ind1 = "_" ind2 = attributes["ind2"] if ind2 == " ": ind2 = "_" if not self.cur_record.has_key(tag + ind1 + ind2): self.cur_record[tag + ind1 + ind2] = [] self.cur_datafield = {} self.cur_record[tag + ind1 + ind2].append(self.cur_datafield) self.in_datafield = True elif name == "subfield": subcode = attributes["code"] if not self.cur_datafield.has_key(subcode): self.cur_subfield = [] self.cur_datafield[subcode] = self.cur_subfield else: self.cur_subfield = self.cur_datafield[subcode] self.in_subfield = True def characters(self, data): if self.in_subfield: self.buffer += data elif self.in_controlfield: self.buffer += data elif "Search-Engine-Total-Number-Of-Results:" in data: print data match_obj = re.search("\d+", data) if match_obj: print int(match_obj.group()) self.counts = int(match_obj.group()) def endElement(self, name): if name == "record": self.in_record = False elif name == "controlfield": if self.cur_tag == "001": self.recid = int(self.buffer) if self.cached_records.has_key(self.recid): # Record has already been parsed, no need to add pass else: # Add record to the global cache self.cached_records[self.recid] = self.cur_record # Add record to the ordered list of results self.records.append(self.cached_records[self.recid]) self.cur_controlfield.append(self.buffer) self.in_controlfield = False self.buffer = "" elif name == "datafield": self.in_datafield = False elif name == "subfield": self.in_subfield = False self.cur_subfield.append(self.buffer) self.buffer = "" def decompose_code(code): """ Decomposes a MARC "code" into tag, ind1, ind2, subcode """ code = "%-6s" % code ind1 = code[3:4] if ind1 == " ": ind1 = "_" ind2 = code[4:5] if ind2 == " ": ind2 = "_" subcode = code[5:6] if subcode == " ": subcode = None return (code[0:3], ind1, ind2, subcode) diff --git a/modules/miscutil/lib/inveniocfg.py b/modules/miscutil/lib/inveniocfg.py index f310a9f7b..1d355a7e2 100644 --- a/modules/miscutil/lib/inveniocfg.py +++ b/modules/miscutil/lib/inveniocfg.py @@ -1,1350 +1,1361 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Invenio configuration and administration CLI tool. Usage: inveniocfg [options] General options: -h, --help print this help -V, --version print version number Options to finish your installation: --create-apache-conf create Apache configuration files --create-tables create DB tables for Invenio --load-webstat-conf load the WebStat configuration --drop-tables drop DB tables of Invenio --check-openoffice check for correctly set up of openoffice temporary directory Options to set up and test a demo site: --create-demo-site create demo site --load-demo-records load demo records --remove-demo-records remove demo records, keeping demo site --drop-demo-site drop demo site configurations too --run-unit-tests run unit test suite (needs demo site) --run-regression-tests run regression test suite (needs demo site) --run-web-tests run web tests in a browser (needs demo site, Firefox, Selenium IDE) Options to update config files in situ: --update-all perform all the update options --update-config-py update config.py file from invenio.conf file --update-dbquery-py update dbquery.py with DB credentials from invenio.conf --update-dbexec update dbexec with DB credentials from invenio.conf --update-bibconvert-tpl update bibconvert templates with CFG_SITE_URL from invenio.conf --update-web-tests update web test cases with CFG_SITE_URL from invenio.conf Options to update DB tables: --reset-all perform all the reset options --reset-sitename reset tables to take account of new CFG_SITE_NAME* --reset-siteadminemail reset tables to take account of new CFG_SITE_ADMIN_EMAIL --reset-fieldnames reset tables to take account of new I18N names from PO files --reset-recstruct-cache reset record structure cache according to CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE Options to help the work: --list print names and values of all options from conf files --get get value of a given option from conf files --conf-dir path to directory where invenio*.conf files are [optional] --detect-system-details print system details such as Apache/Python/MySQL versions """ __revision__ = "$Id$" from ConfigParser import ConfigParser import os import re import shutil import socket import sys def print_usage(): """Print help.""" print __doc__ def print_version(): """Print version information.""" print __revision__ def convert_conf_option(option_name, option_value): """ Convert conf option into Python config.py line, converting values to ints or strings as appropriate. """ ## 1) convert option name to uppercase: option_name = option_name.upper() ## 2) convert option value to int or string: if option_name in ['CFG_BIBUPLOAD_REFERENCE_TAG', 'CFG_BIBUPLOAD_EXTERNAL_SYSNO_TAG', 'CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG', 'CFG_BIBUPLOAD_EXTERNAL_OAIID_PROVENANCE_TAG', 'CFG_BIBUPLOAD_STRONG_TAGS', 'CFG_BIBFORMAT_HIDDEN_TAGS',]: # some options are supposed be string even when they look like # numeric option_value = '"' + option_value + '"' else: try: option_value = int(option_value) except ValueError: option_value = '"' + option_value + '"' ## 3a) special cases: chars regexps if option_name in ['CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS', 'CFG_BIBINDEX_CHARS_PUNCTUATION']: option_value = 'r"[' + option_value[1:-1] + ']"' ## 3abis) special cases: real regexps - if option_name in ['CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES']: + if option_name in ['CFG_BIBINDEX_PERFORM_OCR_ON_DOCNAMES', + 'CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS']: option_value = 'r"' + option_value[1:-1] + '"' ## 3b) special cases: True, False, None if option_value in ['"True"', '"False"', '"None"']: option_value = option_value[1:-1] - ## 3c) special cases: dicts + ## 3c) special cases: dicts and real pythonic lists if option_name in ['CFG_WEBSEARCH_FIELDS_CONVERT', 'CFG_BATCHUPLOADER_WEB_ROBOT_RIGHTS', 'CFG_SITE_EMERGENCY_EMAIL_ADDRESSES', 'CFG_BIBMATCH_FUZZY_WORDLIMITS', 'CFG_BIBMATCH_QUERY_TEMPLATES', 'CFG_WEBSEARCH_SYNONYM_KBRS', 'CFG_BIBINDEX_SYNONYM_KBRS', 'CFG_WEBCOMMENT_EMAIL_REPLIES_TO', 'CFG_WEBCOMMENT_RESTRICTION_DATAFIELD', 'CFG_WEBCOMMENT_ROUND_DATAFIELD', 'CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS', 'CFG_BIBSCHED_NODE_TASKS', 'CFG_BIBEDIT_EXTEND_RECORD_WITH_COLLECTION_TEMPLATE', 'CFG_OAI_METADATA_FORMATS', 'CFG_WEBSUBMIT_DESIRED_CONVERSIONS', - 'CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM',]: + 'CFG_BIBDOCFILE_BEST_FORMATS_TO_EXTRACT_TEXT_FROM', + 'CFG_WEB_API_KEY_ALLOWED_URL', + 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC', + 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES', + 'CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS']: option_value = option_value[1:-1] ## 3cbis) very special cases: dicts with backward compatible string if option_name in ['CFG_BIBINDEX_SPLASH_PAGES']: if option_value.startswith('"{') and option_value.endswith('}"'): option_value = option_value[1:-1] else: option_value = """{%s: ".*"}""" % option_value ## 3d) special cases: comma-separated lists if option_name in ['CFG_SITE_LANGS', 'CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS', 'CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS', 'CFG_BIBUPLOAD_STRONG_TAGS', 'CFG_BIBFORMAT_HIDDEN_TAGS', 'CFG_BIBSCHED_GC_TASKS_TO_REMOVE', 'CFG_BIBSCHED_GC_TASKS_TO_ARCHIVE', 'CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS', 'CFG_BIBUPLOAD_CONTROLLED_PROVENANCE_TAGS', 'CFG_BIBUPLOAD_DELETE_FORMATS', 'CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES', 'CFG_WEBSTYLE_HTTP_STATUS_ALERT_LIST', 'CFG_WEBSEARCH_RSS_I18N_COLLECTIONS', 'CFG_BATCHUPLOADER_FILENAME_MATCHING_POLICY', - 'CFG_BATCHUPLOADER_WEB_ROBOT_AGENT', 'CFG_BIBAUTHORID_EXTERNAL_CLAIMED_RECORDS_KEY', 'CFG_BIBCIRCULATION_ITEM_STATUS_OPTIONAL', 'CFG_PLOTEXTRACTOR_DISALLOWED_TEX', 'CFG_OAI_FRIENDS', 'CFG_WEBSTYLE_REVERSE_PROXY_IPS', 'CFG_BIBEDIT_AUTOCOMPLETE_INSTITUTIONS_FIELDS', 'CFG_BIBFORMAT_DISABLE_I18N_FOR_CACHED_FORMATS', 'CFG_BIBFORMAT_HIDDEN_FILE_FORMATS',]: out = "[" for elem in option_value[1:-1].split(","): if elem: if option_name in ['CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES']: # 3d1) integer values out += "%i, " % int(elem) else: # 3d2) string values out += "'%s', " % elem out += "]" option_value = out ## 3e) special cases: multiline if option_name == 'CFG_OAI_IDENTIFY_DESCRIPTION': # make triple quotes option_value = '""' + option_value + '""' ## 3f) ignore some options: if option_name.startswith('CFG_SITE_NAME_INTL'): # treated elsewhere return ## 3g) special cases: float if option_name in ['CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY', 'CFG_BIBMATCH_LOCAL_SLEEPTIME', 'CFG_BIBMATCH_REMOTE_SLEEPTIME', 'CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT', 'CFG_BIBMATCH_FUZZY_MATCH_VALIDATION_LIMIT']: option_value = float(option_value[1:-1]) ## 3h) special cases: bibmatch validation list if option_name in ['CFG_BIBMATCH_MATCH_VALIDATION_RULESETS']: option_value = option_value[1:-1] - ## 4) finally, return output line: + ## 4a) dropped variables + if option_name in ['CFG_BATCHUPLOADER_WEB_ROBOT_AGENT']: + print >> sys.stderr, ("""ERROR: CFG_BATCHUPLOADER_WEB_ROBOT_AGENT has been dropped in favour of +CFG_BATCHUPLOADER_WEB_ROBOT_AGENTS. +Please, update your invenio-local.conf file accordingly.""") + option_value = option_value[1:-1] + + ## 5) finally, return output line: return '%s = %s' % (option_name, option_value) def cli_cmd_update_config_py(conf): """ Update new config.py from conf options, keeping previous config.py in a backup copy. """ print ">>> Going to update config.py..." ## location where config.py is: configpyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'config.py' ## backup current config.py file: if os.path.exists(configpyfile): shutil.copy(configpyfile, configpyfile + '.OLD') ## here we go: fdesc = open(configpyfile, 'w') ## generate preamble: fdesc.write("# -*- coding: utf-8 -*-\n") fdesc.write("# DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED\n") fdesc.write("# FROM INVENIO.CONF BY EXECUTING:\n") fdesc.write("# " + " ".join(sys.argv) + "\n") ## special treatment for CFG_SITE_NAME_INTL options: fdesc.write("CFG_SITE_NAME_INTL = {}\n") for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): fdesc.write("CFG_SITE_NAME_INTL['%s'] = \"%s\"\n" % (lang, conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang))) ## special treatment for CFG_SITE_SECURE_URL that may be empty, in ## which case it should be put equal to CFG_SITE_URL: if not conf.get("Invenio", "CFG_SITE_SECURE_URL"): conf.set("Invenio", "CFG_SITE_SECURE_URL", conf.get("Invenio", "CFG_SITE_URL")) ## process all the options normally: sections = conf.sections() sections.sort() for section in sections: options = conf.options(section) options.sort() for option in options: if not option.startswith('CFG_DATABASE_'): # put all options except for db credentials into config.py line_out = convert_conf_option(option, conf.get(section, option)) if line_out: fdesc.write(line_out + "\n") ## FIXME: special treatment for experimental variables ## CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES and CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE ## (not offering them in invenio.conf since they will be refactored) fdesc.write("CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE = 0\n") fdesc.write("CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES = [0, 1,]\n") ## generate postamble: fdesc.write("") fdesc.write("# END OF GENERATED FILE") ## we are done: fdesc.close() print "You may want to restart Apache now." print ">>> config.py updated successfully." def cli_cmd_update_dbquery_py(conf): """ Update lib/dbquery.py file with DB parameters read from conf file. Note: this edits dbquery.py in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbquery.py..." ## location where dbquery.py is: dbquerypyfile = conf.get("Invenio", "CFG_PYLIBDIR") + \ os.sep + 'invenio' + os.sep + 'dbquery.py' ## backup current dbquery.py file: if os.path.exists(dbquerypyfile): shutil.copy(dbquerypyfile, dbquerypyfile + '.OLD') ## replace db parameters: out = '' for line in open(dbquerypyfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|PORT|NAME|USER|PASS|SLAVE)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get('Invenio', dbparam)) else: out += line fdesc = open(dbquerypyfile, 'w') fdesc.write(out) fdesc.close() print "You may want to restart Apache now." print ">>> dbquery.py updated successfully." def cli_cmd_update_dbexec(conf): """ Update bin/dbexec file with DB parameters read from conf file. Note: this edits dbexec in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update dbexec..." ## location where dbexec is: dbexecfile = conf.get("Invenio", "CFG_BINDIR") + \ os.sep + 'dbexec' ## backup current dbexec file: if os.path.exists(dbexecfile): shutil.copy(dbexecfile, dbexecfile + '.OLD') ## replace db parameters via sed: out = '' for line in open(dbexecfile, 'r').readlines(): match = re.search(r'^CFG_DATABASE_(HOST|PORT|NAME|USER|PASS|SLAVE)(\s*=\s*)\'.*\'$', line) if match: dbparam = 'CFG_DATABASE_' + match.group(1) out += "%s%s'%s'\n" % (dbparam, match.group(2), conf.get("Invenio", dbparam)) else: out += line fdesc = open(dbexecfile, 'w') fdesc.write(out) fdesc.close() print ">>> dbexec updated successfully." def cli_cmd_update_bibconvert_tpl(conf): """ Update bibconvert/config/*.tpl files looking for 856 http://.../CFG_SITE_RECORD lines, replacing URL with CFG_SITE_URL taken from conf file. Note: this edits tpl files in situ, taking a backup first. Use only when you know what you are doing. """ print ">>> Going to update bibconvert templates..." ## location where bibconvert/config/*.tpl are: tpldir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'bibconvert' + os.sep + 'config' ## find all *.tpl files: for tplfilename in os.listdir(tpldir): if tplfilename.endswith(".tpl"): ## change tpl file: tplfile = tpldir + os.sep + tplfilename shutil.copy(tplfile, tplfile + '.OLD') out = '' for line in open(tplfile, 'r').readlines(): match = re.search(r'^(.*)http://.*?/%s/(.*)$' % conf.get("Invenio", 'CFG_SITE_RECORD'), line) if match: out += "%s%s/%s/%s\n" % (match.group(1), conf.get("Invenio", 'CFG_SITE_URL'), conf.get("Invenio", 'CFG_SITE_RECORD'), match.group(2)) else: out += line fdesc = open(tplfile, 'w') fdesc.write(out) fdesc.close() print ">>> bibconvert templates updated successfully." def cli_cmd_update_web_tests(conf): """ Update web test cases lib/webtest/test_*.html looking for http://.+?[>> Going to update web tests..." ## location where test_*.html files are: testdir = conf.get("Invenio", 'CFG_PREFIX') + os.sep + \ 'lib' + os.sep + 'webtest' + os.sep + 'invenio' ## find all test_*.html files: for testfilename in os.listdir(testdir): if testfilename.startswith("test_") and \ testfilename.endswith(".html"): ## change test file: testfile = testdir + os.sep + testfilename shutil.copy(testfile, testfile + '.OLD') out = '' for line in open(testfile, 'r').readlines(): match = re.search(r'^(.*)http://.+?([)/opt/invenio(.*)$', line) if match: out += "%s%s%s\n" % (match.group(1), conf.get("Invenio", 'CFG_PREFIX'), match.group(2)) else: out += line fdesc = open(testfile, 'w') fdesc.write(out) fdesc.close() print ">>> web tests updated successfully." def cli_cmd_reset_sitename(conf): """ Reset collection-related tables with new CFG_SITE_NAME and CFG_SITE_NAME_INTL* read from conf files. """ print ">>> Going to reset CFG_SITE_NAME and CFG_SITE_NAME_INTL..." from invenio.dbquery import run_sql, IntegrityError # reset CFG_SITE_NAME: sitename = conf.get("Invenio", "CFG_SITE_NAME") try: run_sql("""INSERT INTO collection (id, name, dbquery, reclist) VALUES (1,%s,NULL,NULL)""", (sitename,)) except IntegrityError: run_sql("""UPDATE collection SET name=%s WHERE id=1""", (sitename,)) # reset CFG_SITE_NAME_INTL: for lang in conf.get("Invenio", "CFG_SITE_LANGS").split(","): sitename_lang = conf.get("Invenio", "CFG_SITE_NAME_INTL_" + lang) try: run_sql("""INSERT INTO collectionname (id_collection, ln, type, value) VALUES (%s,%s,%s,%s)""", (1, lang, 'ln', sitename_lang)) except IntegrityError: run_sql("""UPDATE collectionname SET value=%s WHERE ln=%s AND id_collection=1 AND type='ln'""", (sitename_lang, lang)) print "You may want to restart Apache now." print ">>> CFG_SITE_NAME and CFG_SITE_NAME_INTL* reset successfully." def cli_cmd_reset_recstruct_cache(conf): """If CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE is changed, this function will adapt the database to either store or not store the recstruct format.""" from invenio.intbitset import intbitset from invenio.dbquery import run_sql, serialize_via_marshal from invenio.search_engine import get_record from invenio.bibsched import server_pid, pidfile enable_recstruct_cache = conf.get("Invenio", "CFG_BIBUPLOAD_SERIALIZE_RECORD_STRUCTURE") enable_recstruct_cache = enable_recstruct_cache in ('True', '1') pid = server_pid(ping_the_process=False) if pid: print >> sys.stderr, "ERROR: bibsched seems to run with pid %d, according to %s." % (pid, pidfile) print >> sys.stderr, " Please stop bibsched before running this procedure." sys.exit(1) if enable_recstruct_cache: print ">>> Searching records which need recstruct cache resetting; this may take a while..." all_recids = intbitset(run_sql("SELECT id FROM bibrec")) good_recids = intbitset(run_sql("SELECT bibrec.id FROM bibrec JOIN bibfmt ON bibrec.id = bibfmt.id_bibrec WHERE format='recstruct' AND modification_date < last_updated")) recids = all_recids - good_recids print ">>> Generating recstruct cache..." tot = len(recids) count = 0 for recid in recids: value = serialize_via_marshal(get_record(recid)) run_sql("DELETE FROM bibfmt WHERE id_bibrec=%s AND format='recstruct'", (recid, )) run_sql("INSERT INTO bibfmt(id_bibrec, format, last_updated, value) VALUES(%s, 'recstruct', NOW(), %s)", (recid, value)) count += 1 if count % 1000 == 0: print " ... done records %s/%s" % (count, tot) if count % 1000 != 0: print " ... done records %s/%s" % (count, tot) print ">>> recstruct cache generated successfully." else: print ">>> Cleaning recstruct cache..." run_sql("DELETE FROM bibfmt WHERE format='recstruct'") def cli_cmd_reset_siteadminemail(conf): """ Reset user-related tables with new CFG_SITE_ADMIN_EMAIL read from conf files. """ print ">>> Going to reset CFG_SITE_ADMIN_EMAIL..." from invenio.dbquery import run_sql siteadminemail = conf.get("Invenio", "CFG_SITE_ADMIN_EMAIL") run_sql("DELETE FROM user WHERE id=1") run_sql("""INSERT INTO user (id, email, password, note, nickname) VALUES (1, %s, AES_ENCRYPT(email, ''), 1, 'admin')""", (siteadminemail,)) print "You may want to restart Apache now." print ">>> CFG_SITE_ADMIN_EMAIL reset successfully." def cli_cmd_reset_fieldnames(conf): """ Reset I18N field names such as author, title, etc and other I18N ranking method names such as word similarity. Their translations are taken from the PO files. """ print ">>> Going to reset I18N field names..." from invenio.messages import gettext_set_language, language_list_long from invenio.dbquery import run_sql, IntegrityError ## get field id and name list: field_id_name_list = run_sql("SELECT id, name FROM field") ## get rankmethod id and name list: rankmethod_id_name_list = run_sql("SELECT id, name FROM rnkMETHOD") ## update names for every language: for lang, dummy in language_list_long(): _ = gettext_set_language(lang) ## this list is put here in order for PO system to pick names ## suitable for translation field_name_names = {"any field": _("any field"), "title": _("title"), "author": _("author"), "abstract": _("abstract"), "keyword": _("keyword"), "report number": _("report number"), "subject": _("subject"), "reference": _("reference"), "fulltext": _("fulltext"), "collection": _("collection"), "division": _("division"), "year": _("year"), "journal": _("journal"), "experiment": _("experiment"), "record ID": _("record ID")} ## update I18N names for every language: for (field_id, field_name) in field_id_name_list: if field_name_names.has_key(field_name): try: run_sql("""INSERT INTO fieldname (id_field,ln,type,value) VALUES (%s,%s,%s,%s)""", (field_id, lang, 'ln', field_name_names[field_name])) except IntegrityError: run_sql("""UPDATE fieldname SET value=%s WHERE id_field=%s AND ln=%s AND type=%s""", (field_name_names[field_name], field_id, lang, 'ln',)) ## ditto for rank methods: rankmethod_name_names = {"wrd": _("word similarity"), "demo_jif": _("journal impact factor"), "citation": _("times cited"), "citerank_citation_t": _("time-decay cite count"), "citerank_pagerank_c": _("all-time-best cite rank"), "citerank_pagerank_t": _("time-decay cite rank"),} for (rankmethod_id, rankmethod_name) in rankmethod_id_name_list: if rankmethod_name_names.has_key(rankmethod_name): try: run_sql("""INSERT INTO rnkMETHODNAME (id_rnkMETHOD,ln,type,value) VALUES (%s,%s,%s,%s)""", (rankmethod_id, lang, 'ln', rankmethod_name_names[rankmethod_name])) except IntegrityError: run_sql("""UPDATE rnkMETHODNAME SET value=%s WHERE id_rnkMETHOD=%s AND ln=%s AND type=%s""", (rankmethod_name_names[rankmethod_name], rankmethod_id, lang, 'ln',)) print ">>> I18N field names reset successfully." def cli_check_openoffice(conf): """ If OpenOffice.org integration is enabled, checks whether the system is properly configured. """ from invenio.bibtask import check_running_process_user from invenio.websubmit_file_converter import can_unoconv, get_file_converter_logger logger = get_file_converter_logger() for handler in logger.handlers: logger.removeHandler(handler) check_running_process_user() print ">>> Checking if Libre/OpenOffice.org is correctly integrated...", sys.stdout.flush() if can_unoconv(True): print "ok" else: sys.exit(1) def test_db_connection(): """ Test DB connection, and if fails, advise user how to set it up. Useful to be called during table creation. """ print "Testing DB connection...", from invenio.textutils import wrap_text_in_a_box from invenio.dbquery import run_sql, Error ## first, test connection to the DB server: try: run_sql("SHOW TABLES") except Error, err: from invenio.dbquery import CFG_DATABASE_HOST, CFG_DATABASE_PORT, \ CFG_DATABASE_NAME, CFG_DATABASE_USER, CFG_DATABASE_PASS print wrap_text_in_a_box("""\ DATABASE CONNECTIVITY ERROR %(errno)d: %(errmsg)s.\n Perhaps you need to set up database and connection rights? If yes, then please login as MySQL admin user and run the following commands now: $ mysql -h %(dbhost)s -P %(dbport)s -u root -p mysql mysql> CREATE DATABASE %(dbname)s DEFAULT CHARACTER SET utf8; mysql> GRANT ALL PRIVILEGES ON %(dbname)s.* TO %(dbuser)s@%(webhost)s IDENTIFIED BY '%(dbpass)s'; mysql> QUIT The values printed above were detected from your configuration. If they are not right, then please edit your invenio-local.conf file and rerun 'inveniocfg --update-all' first. If the problem is of different nature, then please inspect the above error message and fix the problem before continuing.""" % \ {'errno': err.args[0], 'errmsg': err.args[1], 'dbname': CFG_DATABASE_NAME, 'dbhost': CFG_DATABASE_HOST, 'dbport': CFG_DATABASE_PORT, 'dbuser': CFG_DATABASE_USER, 'dbpass': CFG_DATABASE_PASS, 'webhost': CFG_DATABASE_HOST == 'localhost' and 'localhost' or os.popen('hostname -f', 'r').read().strip(), }) sys.exit(1) print "ok" ## second, test insert/select of a Unicode string to detect ## possible Python/MySQL/MySQLdb mis-setup: print "Testing Python/MySQL/MySQLdb UTF-8 chain...", try: beta_in_utf8 = "β" # Greek beta in UTF-8 is 0xCEB2 run_sql("CREATE TEMPORARY TABLE test__invenio__utf8 (x char(1), y varbinary(2)) DEFAULT CHARACTER SET utf8") run_sql("INSERT INTO test__invenio__utf8 (x, y) VALUES (%s, %s)", (beta_in_utf8, beta_in_utf8)) res = run_sql("SELECT x,y,HEX(x),HEX(y),LENGTH(x),LENGTH(y),CHAR_LENGTH(x),CHAR_LENGTH(y) FROM test__invenio__utf8") assert res[0] == ('\xce\xb2', '\xce\xb2', 'CEB2', 'CEB2', 2L, 2L, 1L, 2L) run_sql("DROP TEMPORARY TABLE test__invenio__utf8") except Exception, err: print wrap_text_in_a_box("""\ DATABASE RELATED ERROR %s\n A problem was detected with the UTF-8 treatment in the chain between the Python application, the MySQLdb connector, and the MySQL database. You may perhaps have installed older versions of some prerequisite packages?\n Please check the INSTALL file and please fix this problem before continuing.""" % err) sys.exit(1) print "ok" def cli_cmd_create_tables(conf): """Create and fill Invenio DB tables. Useful for the installation process.""" print ">>> Going to create and fill tables..." from invenio.config import CFG_PREFIX test_db_connection() for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabcreate.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/dbexec < %s/lib/sql/invenio/tabfill.sql" % (CFG_PREFIX, CFG_PREFIX)]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) for cmd in ["%s/bin/webaccessadmin -u admin -c -a" % CFG_PREFIX]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables created and filled successfully." def cli_cmd_load_webstat_conf(conf): print ">>> Going to load WebStat config..." from invenio.config import CFG_PREFIX cmd = "%s/bin/webstatadmin --load-config" % CFG_PREFIX if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> WebStat config load successfully." def cli_cmd_drop_tables(conf): """Drop Invenio DB tables. Useful for the uninstallation process.""" print ">>> Going to drop tables..." from invenio.config import CFG_PREFIX from invenio.textutils import wrap_text_in_a_box, wait_for_user from invenio.webstat import destroy_customevents wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your database tables!""")) msg = destroy_customevents() if msg: print msg cmd = "%s/bin/dbexec < %s/lib/sql/invenio/tabdrop.sql" % (CFG_PREFIX, CFG_PREFIX) if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Tables dropped successfully." def cli_cmd_create_demo_site(conf): """Create demo site. Useful for testing purposes.""" print ">>> Going to create demo site..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql run_sql("TRUNCATE schTASK") run_sql("TRUNCATE session") run_sql("DELETE FROM user WHERE email=''") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/democfgdata.sql" % \ (CFG_PREFIX, CFG_PREFIX),]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) cli_cmd_reset_fieldnames(conf) # needed for I18N demo ranking method names for cmd in ["%s/bin/webaccessadmin -u admin -c -r -D" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX, "%s/bin/bibsort -u admin --load-config" % CFG_PREFIX, "%s/bin/bibsort 2" % CFG_PREFIX, ]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo site created successfully." def cli_cmd_load_demo_records(conf): """Load demo records. Useful for testing purposes.""" from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql print ">>> Going to load demo records..." run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/bibupload -u admin -i %s/var/tmp/demobibdata.xml" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/bibupload 1" % CFG_PREFIX, "%s/bin/bibdocfile --textify --with-ocr --recid 97" % CFG_PREFIX, "%s/bin/bibdocfile --textify --all" % CFG_PREFIX, "%s/bin/bibindex -u admin" % CFG_PREFIX, "%s/bin/bibindex 2" % CFG_PREFIX, "%s/bin/bibreformat -u admin -o HB" % CFG_PREFIX, "%s/bin/bibreformat 3" % CFG_PREFIX, "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 4" % CFG_PREFIX, "%s/bin/bibrank -u admin" % CFG_PREFIX, "%s/bin/bibrank 5" % CFG_PREFIX, "%s/bin/bibsort -u admin -R" % CFG_PREFIX, "%s/bin/bibsort 6" % CFG_PREFIX, "%s/bin/oairepositoryupdater -u admin" % CFG_PREFIX, "%s/bin/oairepositoryupdater 7" % CFG_PREFIX, "%s/bin/bibupload 8" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records loaded successfully." def cli_cmd_remove_demo_records(conf): """Remove demo records. Useful when you are finished testing.""" print ">>> Going to remove demo records..." from invenio.config import CFG_PREFIX from invenio.dbquery import run_sql from invenio.textutils import wrap_text_in_a_box, wait_for_user wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your records and documents!""")) if os.path.exists(CFG_PREFIX + os.sep + 'var' + os.sep + 'data'): shutil.rmtree(CFG_PREFIX + os.sep + 'var' + os.sep + 'data') run_sql("TRUNCATE schTASK") for cmd in ["%s/bin/dbexec < %s/lib/sql/invenio/tabbibclean.sql" % (CFG_PREFIX, CFG_PREFIX), "%s/bin/webcoll -u admin" % CFG_PREFIX, "%s/bin/webcoll 1" % CFG_PREFIX,]: if os.system(cmd): print "ERROR: failed execution of", cmd sys.exit(1) print ">>> Demo records removed successfully." def cli_cmd_drop_demo_site(conf): """Drop demo site completely. Useful when you are finished testing.""" print ">>> Going to drop demo site..." from invenio.textutils import wrap_text_in_a_box, wait_for_user wait_for_user(wrap_text_in_a_box("""WARNING: You are going to destroy your site and documents!""")) cli_cmd_drop_tables(conf) cli_cmd_create_tables(conf) cli_cmd_remove_demo_records(conf) print ">>> Demo site dropped successfully." def cli_cmd_run_unit_tests(conf): """Run unit tests, usually on the working demo site.""" from invenio.testutils import build_and_run_unit_test_suite if not build_and_run_unit_test_suite(): sys.exit(1) def cli_cmd_run_regression_tests(conf): """Run regression tests, usually on the working demo site.""" from invenio.testutils import build_and_run_regression_test_suite if not build_and_run_regression_test_suite(): sys.exit(1) def cli_cmd_run_web_tests(conf): """Run web tests in a browser. Requires Firefox with Selenium.""" from invenio.testutils import build_and_run_web_test_suite if not build_and_run_web_test_suite(): sys.exit(1) def _detect_ip_address(): """Detect IP address of this computer. Useful for creating Apache vhost conf snippet on RHEL like machines. @return: IP address, or '*' if cannot detect @rtype: string @note: creates socket for real in order to detect real IP address, not the loopback one. """ try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.connect(('invenio-software.org', 0)) return s.getsockname()[0] except: return '*' def cli_cmd_create_apache_conf(conf): """ Create Apache conf files for this site, keeping previous files in a backup copy. """ print ">>> Going to create Apache conf files..." from invenio.textutils import wrap_text_in_a_box from invenio.access_control_config import CFG_EXTERNAL_AUTH_USING_SSO apache_conf_dir = conf.get("Invenio", 'CFG_ETCDIR') + \ os.sep + 'apache' ## Preparation of XSendFile directive xsendfile_directive_needed = int(conf.get("Invenio", 'CFG_BIBDOCFILE_USE_XSENDFILE')) != 0 if xsendfile_directive_needed: xsendfile_directive = "XSendFile On\n" else: xsendfile_directive = "#XSendFile On\n" for path in (conf.get('Invenio', 'CFG_WEBSUBMIT_FILEDIR'), # BibDocFile conf.get('Invenio', 'CFG_WEBDIR'), conf.get('Invenio', 'CFG_WEBSUBMIT_STORAGEDIR'), # WebSubmit conf.get('Invenio', 'CFG_TMPDIR'), os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'tmp', 'attachfile'), os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'data', 'comments'), os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'data', 'baskets', 'comments'), '/tmp'): # BibExport if xsendfile_directive_needed: xsendfile_directive += ' XSendFilePath %s\n' % path else: xsendfile_directive += ' #XSendFilePath %s\n' % path xsendfile_directive = xsendfile_directive.strip() ## Preparation of deflate directive deflate_directive_needed = int(conf.get("Invenio", 'CFG_WEBSTYLE_HTTP_USE_COMPRESSION')) != 0 if deflate_directive_needed: deflate_directive = r""" ## Configuration snippet taken from: ## SetOutputFilter DEFLATE # Netscape 4.x has some problems... BrowserMatch ^Mozilla/4 gzip-only-text/html # Netscape 4.06-4.08 have some more problems BrowserMatch ^Mozilla/4\.0[678] no-gzip # MSIE masquerades as Netscape, but it is fine # BrowserMatch \bMSIE !no-gzip !gzip-only-text/html # NOTE: Due to a bug in mod_setenvif up to Apache 2.0.48 # the above regex won't work. You can use the following # workaround to get the desired effect: BrowserMatch \bMSI[E] !no-gzip !gzip-only-text/html # Don't compress images SetEnvIfNoCase Request_URI \ \.(?:gif|jpe?g|png)$ no-gzip dont-vary # Make sure proxies don't deliver the wrong content Header append Vary User-Agent env=!dont-vary """ else: deflate_directive = "" if CFG_EXTERNAL_AUTH_USING_SSO: shibboleth_directive = r""" SSLRequireSSL # The modules only work using HTTPS AuthType shibboleth ShibRequireSession On ShibRequireAll On ShibExportAssertion Off require valid-user """ else: shibboleth_directive = "" ## Apache vhost conf file is distro specific, so analyze needs: # Gentoo (and generic defaults): listen_directive_needed = True ssl_pem_directive_needed = False ssl_pem_path = '/etc/apache2/ssl/apache.pem' ssl_crt_path = '/etc/apache2/ssl/server.crt' ssl_key_path = '/etc/apache2/ssl/server.key' vhost_ip_address_needed = False wsgi_socket_directive_needed = False # Debian: if os.path.exists(os.path.sep + 'etc' + os.path.sep + 'debian_version'): listen_directive_needed = False ssl_pem_directive_needed = True # RHEL/SLC: if os.path.exists(os.path.sep + 'etc' + os.path.sep + 'redhat-release'): listen_directive_needed = False ssl_crt_path = '/etc/pki/tls/certs/localhost.crt' ssl_key_path = '/etc/pki/tls/private/localhost.key' vhost_ip_address_needed = True wsgi_socket_directive_needed = True # maybe we are using non-standard ports? vhost_site_url = conf.get('Invenio', 'CFG_SITE_URL').replace("http://", "") if vhost_site_url.startswith("https://"): ## The installation is configured to require HTTPS for any connection vhost_site_url = vhost_site_url.replace("https://", "") vhost_site_url_port = '80' vhost_site_secure_url = conf.get('Invenio', 'CFG_SITE_SECURE_URL').replace("https://", "") vhost_site_secure_url_port = '443' if ':' in vhost_site_url: vhost_site_url, vhost_site_url_port = vhost_site_url.split(':', 1) if ':' in vhost_site_secure_url: vhost_site_secure_url, vhost_site_secure_url_port = vhost_site_secure_url.split(':', 1) if vhost_site_url_port != '80' or vhost_site_secure_url_port != '443': listen_directive_needed = True ## OK, let's create Apache vhost files: if not os.path.exists(apache_conf_dir): os.mkdir(apache_conf_dir) apache_vhost_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost.conf' apache_vhost_ssl_file = apache_conf_dir + os.sep + \ 'invenio-apache-vhost-ssl.conf' apache_vhost_body = """\ AddDefaultCharset UTF-8 ServerSignature Off ServerTokens Prod NameVirtualHost %(vhost_ip_address)s:%(vhost_site_url_port)s %(listen_directive)s %(wsgi_socket_directive)s WSGIRestrictStdout Off deny from all deny from all ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s DocumentRoot %(webdir)s Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all ErrorLog %(logdir)s/apache.err LogLevel warn LogFormat "%%h %%l %%u %%t \\"%%r\\" %%>s %%b \\"%%{Referer}i\\" \\"%%{User-agent}i\\" %%D" combined_with_timing CustomLog %(logdir)s/apache.log combined_with_timing DirectoryIndex index.en.html index.html Alias /img/ %(webdir)s/img/ Alias /css/ %(webdir)s/css/ Alias /js/ %(webdir)s/js/ Alias /flash/ %(webdir)s/flash/ Alias /css/ %(webdir)s/css/ Alias /export/ %(webdir)s/export/ Alias /MathJax/ %(webdir)s/MathJax/ Alias /jsCalendar/ %(webdir)s/jsCalendar/ Alias /ckeditor/ %(webdir)s/ckeditor/ Alias /mediaelement/ %(webdir)s/mediaelement/ AliasMatch /sitemap-(.*) %(webdir)s/sitemap-$1 Alias /robots.txt %(webdir)s/robots.txt Alias /favicon.ico %(webdir)s/favicon.ico WSGIDaemonProcess invenio processes=5 threads=1 display-name=%%{GROUP} inactivity-timeout=3600 maximum-requests=10000 WSGIImportScript %(wsgidir)s/invenio.wsgi process-group=invenio application-group=%%{GLOBAL} WSGIScriptAlias / %(wsgidir)s/invenio.wsgi WSGIPassAuthorization On %(xsendfile_directive)s WSGIProcessGroup invenio WSGIApplicationGroup %%{GLOBAL} Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all %(deflate_directive)s """ % {'vhost_site_url_port': vhost_site_url_port, 'servername': vhost_site_url, 'serveralias': vhost_site_url.split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), 'libdir' : conf.get('Invenio', 'CFG_PYLIBDIR'), 'wsgidir': os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'www-wsgi'), 'vhost_ip_address': vhost_ip_address_needed and _detect_ip_address() or '*', 'listen_directive': listen_directive_needed and 'Listen ' + vhost_site_url_port or \ '#Listen ' + vhost_site_url_port, 'wsgi_socket_directive': (wsgi_socket_directive_needed and \ 'WSGISocketPrefix ' or '#WSGISocketPrefix ') + \ conf.get('Invenio', 'CFG_PREFIX') + os.sep + 'var' + os.sep + 'run', 'xsendfile_directive' : xsendfile_directive, 'deflate_directive': deflate_directive, } apache_vhost_ssl_body = """\ ServerSignature Off ServerTokens Prod %(listen_directive)s NameVirtualHost %(vhost_ip_address)s:%(vhost_site_secure_url_port)s %(ssl_pem_directive)s %(ssl_crt_directive)s %(ssl_key_directive)s WSGIRestrictStdout Off deny from all deny from all ServerName %(servername)s ServerAlias %(serveralias)s ServerAdmin %(serveradmin)s SSLEngine on DocumentRoot %(webdir)s Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all ErrorLog %(logdir)s/apache-ssl.err LogLevel warn LogFormat "%%h %%l %%u %%t \\"%%r\\" %%>s %%b \\"%%{Referer}i\\" \\"%%{User-agent}i\\" %%D" combined_with_timing CustomLog %(logdir)s/apache-ssl.log combined_with_timing DirectoryIndex index.en.html index.html Alias /img/ %(webdir)s/img/ Alias /css/ %(webdir)s/css/ Alias /js/ %(webdir)s/js/ Alias /flash/ %(webdir)s/flash/ Alias /css/ %(webdir)s/css/ Alias /export/ %(webdir)s/export/ Alias /MathJax/ %(webdir)s/MathJax/ Alias /jsCalendar/ %(webdir)s/jsCalendar/ Alias /ckeditor/ %(webdir)s/ckeditor/ Alias /mediaelement/ %(webdir)s/mediaelement/ AliasMatch /sitemap-(.*) %(webdir)s/sitemap-$1 Alias /robots.txt %(webdir)s/robots.txt Alias /favicon.ico %(webdir)s/favicon.ico RedirectMatch /sslredirect/(.*) http://$1 WSGIScriptAlias / %(wsgidir)s/invenio.wsgi WSGIPassAuthorization On %(xsendfile_directive)s WSGIProcessGroup invenio WSGIApplicationGroup %%{GLOBAL} Options FollowSymLinks MultiViews AllowOverride None Order allow,deny Allow from all %(deflate_directive)s %(shibboleth_directive)s """ % {'vhost_site_secure_url_port': vhost_site_secure_url_port, 'servername': vhost_site_secure_url, 'serveralias': vhost_site_secure_url.split('.')[0], 'serveradmin': conf.get('Invenio', 'CFG_SITE_ADMIN_EMAIL'), 'webdir': conf.get('Invenio', 'CFG_WEBDIR'), 'logdir': conf.get('Invenio', 'CFG_LOGDIR'), 'libdir' : conf.get('Invenio', 'CFG_PYLIBDIR'), 'wsgidir' : os.path.join(conf.get('Invenio', 'CFG_PREFIX'), 'var', 'www-wsgi'), 'vhost_ip_address': vhost_ip_address_needed and _detect_ip_address() or '*', 'listen_directive' : listen_directive_needed and 'Listen ' + vhost_site_secure_url_port or \ '#Listen ' + vhost_site_secure_url_port, 'ssl_pem_directive': ssl_pem_directive_needed and \ 'SSLCertificateFile %s' % ssl_pem_path or \ '#SSLCertificateFile %s' % ssl_pem_path, 'ssl_crt_directive': ssl_pem_directive_needed and \ '#SSLCertificateFile %s' % ssl_crt_path or \ 'SSLCertificateFile %s' % ssl_crt_path, 'ssl_key_directive': ssl_pem_directive_needed and \ '#SSLCertificateKeyFile %s' % ssl_key_path or \ 'SSLCertificateKeyFile %s' % ssl_key_path, 'xsendfile_directive' : xsendfile_directive, 'deflate_directive': deflate_directive, 'shibboleth_directive': shibboleth_directive, } # write HTTP vhost snippet: if os.path.exists(apache_vhost_file): shutil.copy(apache_vhost_file, apache_vhost_file + '.OLD') fdesc = open(apache_vhost_file, 'w') fdesc.write(apache_vhost_body) fdesc.close() print print "Created file", apache_vhost_file # write HTTPS vhost snippet: vhost_ssl_created = False if conf.get('Invenio', 'CFG_SITE_SECURE_URL').startswith("https://"): if os.path.exists(apache_vhost_ssl_file): shutil.copy(apache_vhost_ssl_file, apache_vhost_ssl_file + '.OLD') fdesc = open(apache_vhost_ssl_file, 'w') fdesc.write(apache_vhost_ssl_body) fdesc.close() vhost_ssl_created = True print "Created file", apache_vhost_ssl_file print wrap_text_in_a_box("""\ Apache virtual host configuration file(s) for your Invenio site was(were) created. Please check created file(s) and activate virtual host(s). For example, you can put the following include statements in your httpd.conf:\n Include %s %s Please see the INSTALL file for more details. """ % (apache_vhost_file, (vhost_ssl_created and 'Include ' or '#Include ') + apache_vhost_ssl_file)) print ">>> Apache conf files created." def cli_cmd_get(conf, varname): """ Return value of VARNAME read from CONF files. Useful for third-party programs to access values of conf options such as CFG_PREFIX. Return None if VARNAME is not found. """ # do not pay attention to upper/lower case: varname = varname.lower() # do not pay attention to section names yet: all_options = {} for section in conf.sections(): for option in conf.options(section): all_options[option] = conf.get(section, option) return all_options.get(varname, None) def cli_cmd_list(conf): """ Print a list of all conf options and values from CONF. """ sections = conf.sections() sections.sort() for section in sections: options = conf.options(section) options.sort() for option in options: print option.upper(), '=', conf.get(section, option) def _grep_version_from_executable(path_to_exec, version_regexp): """ Try to detect a program version by digging into its binary PATH_TO_EXEC and looking for VERSION_REGEXP. Return program version as a string. Return empty string if not succeeded. """ from invenio.shellutils import run_shell_command exec_version = "" if os.path.exists(path_to_exec): dummy1, cmd2_out, dummy2 = run_shell_command("strings %s | grep %s", (path_to_exec, version_regexp)) if cmd2_out: for cmd2_out_line in cmd2_out.split("\n"): if len(cmd2_out_line) > len(exec_version): # the longest the better exec_version = cmd2_out_line return exec_version def detect_apache_version(): """ Try to detect Apache version by localizing httpd or apache executables and grepping inside binaries. Return list of all found Apache versions and paths. (For a given executable, the returned format is 'apache_version [apache_path]'.) Return empty list if no success. """ from invenio.shellutils import run_shell_command out = [] dummy1, cmd_out, dummy2 = run_shell_command("locate bin/httpd bin/apache") for apache in cmd_out.split("\n"): apache_version = _grep_version_from_executable(apache, '^Apache\/') if apache_version: out.append("%s [%s]" % (apache_version, apache)) return out def cli_cmd_detect_system_details(conf): """ Detect and print system details such as Apache/Python/MySQL versions etc. Useful for debugging problems on various OS. """ import MySQLdb print ">>> Going to detect system details..." print "* Hostname: " + socket.gethostname() print "* Invenio version: " + conf.get("Invenio", "CFG_VERSION") print "* Python version: " + sys.version.replace("\n", " ") print "* Apache version: " + ";\n ".join(detect_apache_version()) print "* MySQLdb version: " + MySQLdb.__version__ try: from invenio.dbquery import run_sql print "* MySQL version:" for key, val in run_sql("SHOW VARIABLES LIKE 'version%'") + \ run_sql("SHOW VARIABLES LIKE 'charact%'") + \ run_sql("SHOW VARIABLES LIKE 'collat%'"): if False: print " - %s: %s" % (key, val) elif key in ['version', 'character_set_client', 'character_set_connection', 'character_set_database', 'character_set_results', 'character_set_server', 'character_set_system', 'collation_connection', 'collation_database', 'collation_server']: print " - %s: %s" % (key, val) except ImportError: print "* ERROR: cannot import dbquery" print ">>> System details detected successfully." def main(): """Main entry point.""" conf = ConfigParser() if '--help' in sys.argv or \ '-h' in sys.argv: print_usage() elif '--version' in sys.argv or \ '-V' in sys.argv: print_version() else: confdir = None if '--conf-dir' in sys.argv: try: confdir = sys.argv[sys.argv.index('--conf-dir') + 1] except IndexError: pass # missing --conf-dir argument value if not os.path.exists(confdir): print "ERROR: bad or missing --conf-dir option value." sys.exit(1) else: ## try to detect path to conf dir (relative to this bin dir): confdir = re.sub(r'/bin$', '/etc', sys.path[0]) ## read conf files: for conffile in [confdir + os.sep + 'invenio.conf', confdir + os.sep + 'invenio-autotools.conf', confdir + os.sep + 'invenio-local.conf',]: if os.path.exists(conffile): conf.read(conffile) else: if not conffile.endswith("invenio-local.conf"): # invenio-local.conf is optional, otherwise stop print "ERROR: Badly guessed conf file location", conffile print "(Please use --conf-dir option.)" sys.exit(1) ## decide what to do: done = False for opt_idx in range(0, len(sys.argv)): opt = sys.argv[opt_idx] if opt == '--conf-dir': # already treated before, so skip silently: pass elif opt == '--get': try: varname = sys.argv[opt_idx + 1] except IndexError: print "ERROR: bad or missing --get option value." sys.exit(1) if varname.startswith('-'): print "ERROR: bad or missing --get option value." sys.exit(1) varvalue = cli_cmd_get(conf, varname) if varvalue is not None: print varvalue else: sys.exit(1) done = True elif opt == '--list': cli_cmd_list(conf) done = True elif opt == '--detect-system-details': cli_cmd_detect_system_details(conf) done = True elif opt == '--create-tables': cli_cmd_create_tables(conf) done = True elif opt == '--load-webstat-conf': cli_cmd_load_webstat_conf(conf) done = True elif opt == '--drop-tables': cli_cmd_drop_tables(conf) done = True elif opt == '--check-openoffice': cli_check_openoffice(conf) done = True elif opt == '--create-demo-site': cli_cmd_create_demo_site(conf) done = True elif opt == '--load-demo-records': cli_cmd_load_demo_records(conf) done = True elif opt == '--remove-demo-records': cli_cmd_remove_demo_records(conf) done = True elif opt == '--drop-demo-site': cli_cmd_drop_demo_site(conf) done = True elif opt == '--run-unit-tests': cli_cmd_run_unit_tests(conf) done = True elif opt == '--run-regression-tests': cli_cmd_run_regression_tests(conf) done = True elif opt == '--run-web-tests': cli_cmd_run_web_tests(conf) done = True elif opt == '--update-all': cli_cmd_update_config_py(conf) cli_cmd_update_dbquery_py(conf) cli_cmd_update_dbexec(conf) cli_cmd_update_bibconvert_tpl(conf) cli_cmd_update_web_tests(conf) done = True elif opt == '--update-config-py': cli_cmd_update_config_py(conf) done = True elif opt == '--update-dbquery-py': cli_cmd_update_dbquery_py(conf) done = True elif opt == '--update-dbexec': cli_cmd_update_dbexec(conf) done = True elif opt == '--update-bibconvert-tpl': cli_cmd_update_bibconvert_tpl(conf) done = True elif opt == '--update-web-tests': cli_cmd_update_web_tests(conf) done = True elif opt == '--reset-all': cli_cmd_reset_sitename(conf) cli_cmd_reset_siteadminemail(conf) cli_cmd_reset_fieldnames(conf) cli_cmd_reset_recstruct_cache(conf) done = True elif opt == '--reset-sitename': cli_cmd_reset_sitename(conf) done = True elif opt == '--reset-siteadminemail': cli_cmd_reset_siteadminemail(conf) done = True elif opt == '--reset-fieldnames': cli_cmd_reset_fieldnames(conf) done = True elif opt == '--reset-recstruct-cache': cli_cmd_reset_recstruct_cache(conf) done = True elif opt == '--create-apache-conf': cli_cmd_create_apache_conf(conf) done = True elif opt.startswith("-") and opt != '--yes-i-know': print "ERROR: unknown option", opt sys.exit(1) if not done: print """ERROR: Please specify a command. Please see '--help'.""" sys.exit(1) if __name__ == '__main__': main() diff --git a/modules/miscutil/lib/plotextractor_getter.py b/modules/miscutil/lib/plotextractor_getter.py index f86f0cac1..9221bf5d4 100644 --- a/modules/miscutil/lib/plotextractor_getter.py +++ b/modules/miscutil/lib/plotextractor_getter.py @@ -1,630 +1,633 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import urllib2, time, os, sys, re from invenio.config import CFG_TMPDIR, \ CFG_PLOTEXTRACTOR_SOURCE_BASE_URL, \ CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER, \ CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER, \ CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT from invenio.plotextractor_config import CFG_PLOTEXTRACTOR_DESY_BASE, \ CFG_PLOTEXTRACTOR_DESY_PIECE from invenio.search_engine import get_record from invenio.bibrecord import record_get_field_instances, \ field_get_subfield_values from invenio.shellutils import run_shell_command from invenio.plotextractor_output_utils import write_message +from invenio.urlutils import make_invenio_opener + +PLOTEXTRACTOR_OPENER = make_invenio_opener('plotextractor') PDF_EXTENSION = '.pdf' ARXIV_HEADER = 'arXiv:' HEP_EX = ['hep-ex/', 9405, ARXIV_HEADER + 'hep-ex_'] # experimental # a note about hep-ex: the hep-ex papers from 9403 nad 9404 are stored # in arXiv's servers as hep-ph HEP_LAT = ['hep-lat/', 9107, ARXIV_HEADER + 'hep-lat_'] # lattice HEP_PH = ['hep-ph/', 9203, ARXIV_HEADER + 'hep-ph_'] # phenomenology HEP_TH = ['hep-th/', 9108, ARXIV_HEADER + 'hep-th_'] # theory HEP_AREAS = [HEP_EX, HEP_LAT, HEP_PH, HEP_TH] URL = 0 BEGIN_YEAR_MONTH_INDEX = 1 AREA_STRING_INDEX = 2 URL_MOVE = int('0704') CENTURY_END = int('9912') CENTURY_BEGIN = int('0001') ARBITRARY_FROM_DATE = int('9101') FIX_FOR_YEAR_END = 88 current_yearmonth = int(('%02d%02d' % (time.localtime().tm_year, \ time.localtime().tm_mon))[2:]) """ each of the areas of hep began in a different year and month. beginning in 0704, i.e. April 2007, arXiv moved its URLS from ARXIV_BASE + E_PRINT + HEP_AREA + <> to ARXIV_BASE + E_PRINT + <> the papers for a given month are numbered between yymm.0001 and yymm.9999 after the URL move, and before that they are between yymm001 and yymm999 """ help_param = 'help' dir_param = 'dir' from_param = 'from' from_index_param = 'fromindex' ref_file_param = 'reffile' single_param = 'single' param_abbrs = 'hd:f:i:r:s:' params = [help_param, dir_param + '=', from_param + '=', from_index_param + '=', ref_file_param + '=', single_param + '='] def harvest(to_dir, from_date, from_index): """ Calls upon arXiv using URLS as described above in order to grab all the tarballs from HEP areas. @param: dir (string): the directory where everything that gets downloaded will sit @param: from_date (int): the date from which we would like to harvest, in YYMM format @param: from_index (int): the index where we want to begin our harvest in YYMM. i.e. we want to start with the 345th record in 1002. @output: TONS OF .tar.gz FILES FROM ARXIV @return: (none) """ global current_yearmonth if from_date > current_yearmonth and from_date < ARBITRARY_FROM_DATE: write_message('Please choose a from date that is not in the future!') sys.exit(1) if from_date % 100 > 12: write_message('Please choose a from date in the form YYMM') sys.exit(1) if from_date >= ARBITRARY_FROM_DATE or from_date < URL_MOVE: for area in HEP_AREAS: yearmonthindex = area[BEGIN_YEAR_MONTH_INDEX] # nasty casing! # I find this particularly horrid because we have to wrap dates.. # i.e. although 9901 is more than 0001, we might want things in # 0001 and not from 9901 if from_date < current_yearmonth: # we want to start in the new century; skip the while below yearmonthindex = CENTURY_END elif from_date < CENTURY_END: yearmonthindex = from_date # grab stuff from between 92 and 99 old_URL_harvest(yearmonthindex, CENTURY_END, to_dir, area) yearmonthindex = CENTURY_BEGIN # more nasty casing if from_date < URL_MOVE: # that means we want to start sometime before the weird # url change yearmonthindex = from_date elif from_date > URL_MOVE and from_date < ARBITRARY_FROM_DATE: # we don't want to start yet yearmonthindex = URL_MOVE # grab stuff from between 00 and 07 old_URL_harvest(yearmonthindex, URL_MOVE, to_dir, area) # also after the URL move, there was no distinction between # papers from different areas. hence, outside the for loop # even more nasty casing! if from_date < current_yearmonth and from_date > URL_MOVE: # we want to start someplace after the URL move and before now yearmonthindex = from_date else: yearmonthindex = URL_MOVE # grab stuff from between 07 and today new_URL_harvest(yearmonthindex, from_index, to_dir) def make_single_directory(to_dir, dirname): """ Makes a subdirectory for the arXiv record we are working with and returns its exact location. @param: to_dir (string): the name of the directory we want to make it in @param: dirname (string): the name of the directory we want to create @output: a new directory called dirname located in to_dir @return: the absolute path to the new directory """ new_dir = os.path.join(to_dir, dirname) if not os.path.isdir(new_dir): try: os.mkdir(new_dir) except OSError: write_message('Failed to make new dir...') return to_dir return new_dir def make_useful_directories(yearmonthindex, to_dir): """ Builds up the hierarchical filestructure for saving these things in a useful way. @param: yearmonthindex (int): YYMM @param: to_dir (string): where we want to build the directories from @return month_dir (string): the new directory we are going to put stuff in """ year = yearmonthindex / 100 if year >= (ARBITRARY_FROM_DATE / 100): year = '19%02d' % year else: year = '20%02d' % year month = '%02d' % (yearmonthindex % 100) year_dir = os.path.join(to_dir, year) if not os.path.isdir(year_dir): os.mkdir(year_dir) month_dir = os.path.join(year_dir, month) if not os.path.isdir(month_dir): os.mkdir(month_dir) return month_dir def get_list_of_all_matching_files(basedir, filetypes): """ This function uses the os module in order tocrawl through the directory tree rooted at basedir and find all the files therein that include filetype in their 'file' output. Returns a list of absolute paths to all files. @param: basedir (string): the directory where we want to start crawling @param: filetypes ([string, string]): something that will be contained in the output of running 'file' on the types of files we're looking for @return: file_paths ([string, string, ...]): a list of full paths to the files that we discovered """ file_paths = [] for dirpath, dummy0, filenames in os.walk(basedir): for filename in filenames: full_path = os.path.join(dirpath, filename) dummy1, cmd_out, dummy2 = run_shell_command('file %s', (full_path,)) for filetype in filetypes: if cmd_out.find(filetype) > -1: file_paths.append(full_path) return file_paths def tarballs_by_recids(recids, sdir): """ Take a string representing one recid or several and get the associated tarballs for those ids. @param: recids (string): the record id or ids @param: sdir (string): where the tarballs should live @return: tarballs ([string, string, ...]): locations of tarballs """ list_of_ids = [] if ',' in recids: recids = recids.split(',') for recid in recids: if '-' in recid: low, high = recid.split('-') recid = range(int(low), int(high)) list_of_ids.extend(recid) else: recid = int(recid) list_of_ids.append(recid) else: if '-' in recids: low, high = recid.split('-') list_of_ids = range(int(low), int(high)) else: list_of_ids = int(recid) arXiv_ids = [] for recid in list_of_ids: rec = get_record(recid) for afieldinstance in record_get_field_instances(rec, tag='037'): if 'arXiv' == field_get_subfield_values(afieldinstance, '9')[0]: arXiv_id = field_get_subfield_values(afieldinstance, 'a')[0] arXiv_ids.append(arXiv_id) return tarballs_by_arXiv_id(arXiv_ids, sdir) def tarballs_by_arXiv_id(arXiv_ids, sdir): """ Takes an list of arXiv ids and downloads their tarballs and returns a list of the tarballs' locations. @param: arXiv_ids ([string, string, ...]): the arXiv ids you would like to have tarballs for @param: sdir (string): the place to download these tarballs to @return: tarballs ([string, ...]): a list of the tarballs downloaded """ tarballs = [] for arXiv_id in arXiv_ids: if 'arXiv' not in arXiv_id: arXiv_id = 'arXiv:' + arXiv_id tarball, dummy_pdf = harvest_single(arXiv_id, sdir, ("tarball",)) if tarball != None: tarballs.append(tarball) time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) return tarballs def parse_and_download(infile, sdir): """ Read the write_messageation in the input file and download the corresponding tarballs from arxiv. @param: infile (string): the name of the file to parse @param: sdir (string): where to put the downloaded tarballs """ tarfiles = [] tardir = os.path.join(sdir, 'tarballs') if not os.path.isdir(tardir): try: os.makedirs(tardir) except: write_message(sys.exc_info()[0]) write_message('files will be loose, not in ' + tardir) tardir = sdir infile = open(infile) for line in infile.readlines(): line = line.strip() if line.startswith('http://'): # hurray! url = line filename = url.split('/')[-1] if not download(url, tardir, filename): write_message(filename + ' may already exist') write_message(sys.exc_info()[0]) filename = os.path.join(tardir, filename) tarfiles.append(filename) time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) # be nice! elif line.startswith('arXiv'): tarfiles.extend(tarballs_by_arXiv_id([line.strip()], sdir)) return tarfiles def harvest_single(single, to_dir, selection=("tarball", "pdf")): """ if we only want to harvest one id (arXiv or DESY), we can use this. @param: single (string): an id from arXiv or DESY @param: to_dir (string): where the output should be saved @output: the PDF and source tarball (if applicable) of this single record @return: (tarball, pdf): the location of the source tarball and PDF, None if not found """ if single.find('arXiv') > -1 and 'arxiv.org' in CFG_PLOTEXTRACTOR_SOURCE_BASE_URL.lower(): id_str = re.findall('[a-zA-Z\\-]+/\\d+|\\d+\\.\\d+', single)[0] idno = id_str.split('/') if len(idno) > 0: idno = idno[-1] yymm = int(idno[:4]) yymm_dir = make_useful_directories(yymm, to_dir) url_for_file = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + \ CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER + \ id_str url_for_pdf = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + \ CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER + \ id_str + '.pdf' # adds '.pdf' to avoid arXiv internal redirect from arXivID to arXivID.pdf individual_file = 'arXiv:' + id_str.replace('/', '_') individual_dir = make_single_directory(yymm_dir, individual_file) abs_path = os.path.join(individual_dir, individual_file) tarball = abs_path pdf = abs_path + '.pdf' write_message('download ' + url_for_file + ' to ' + abs_path) if "tarball" in selection and not download(url_for_file, individual_file, individual_dir): write_message('download of tarball failed/skipped') tarball = None if "pdf" in selection and not download(url_for_pdf, individual_file + '.pdf', individual_dir): write_message('download of pdf failed/skipped') pdf = None return (tarball, pdf) elif single.find('arXiv') > -1 and CFG_PLOTEXTRACTOR_SOURCE_BASE_URL != '': # hmm... is it a filesystem? if CFG_PLOTEXTRACTOR_SOURCE_BASE_URL.startswith('/'): if not os.path.exists(CFG_PLOTEXTRACTOR_SOURCE_BASE_URL): write_message('PROBLEM WITH CFG_PLOTEXTRACTOR_SOURCE_BASE_URL: we cannot ' + \ 'find this folder!') return (None, None) for root, files, dummy in os.walk(CFG_PLOTEXTRACTOR_SOURCE_BASE_URL): for file_name in files: id_no = single.replace('arXiv', '') if file_name.find(id_no) > -1 or\ file_name.find(id_no.replace('/', '_')) > -1 or\ file_name.find(id_no.replace('_', '/')) > -1 or\ file_name.find(id_no.replace(':', '')) > -1: # that's our file! probably. return (os.path.join(root, file_name), None) # well, no luck there return (None, None) # okay... is it... a website? elif CFG_PLOTEXTRACTOR_SOURCE_BASE_URL.startswith('http') and "tarball" in selection: url_for_file = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + single individual_file = os.path.join(to_dir, single) download(url_for_file, individual_file, to_dir) return (individual_file, None) # well, I don't know what to do with it else: write_message('unsure how to handle CFG_PLOTEXTRACTOR_SOURCE_BASE_URL. ' + \ 'please fix the harvest_single function in ' + \ 'miscutil/lib/plotextractor_getter.py') return (None, None) elif single.find('DESY') > -1 and "pdf" in selection: # also okay! idno = re.findall('\\d{2,4}-\\d{3}', single)[0] year, number = idno.split('-') if len(year) < 4: if int(year) > 92: year = '19' + year else: year = '20' + year year_dir = make_single_directory(to_dir, year) desy_dir = make_single_directory(year_dir, 'DESY') individual_dir = make_single_directory(desy_dir, number) id_no = year[2:] + '-' + number + '.pdf' url_for_file = CFG_PLOTEXTRACTOR_DESY_BASE + year + \ CFG_PLOTEXTRACTOR_DESY_PIECE + id_no individual_file = id_no write_message('download ' + url_for_file + ' to ' + \ os.path.join(individual_dir, individual_file)) download(url_for_file, individual_file, individual_dir) return (None, individual_file) write_message('END') return (None, None) def src_pdf_from_marc(marc_file): """ Given a marc file, this function attempts to determine where to find a pdf for that record @param: marc_file (string): the location of a marc file we can look at @return: pdfloc (string): the location of the downloaded PDF source file, None if no pdf was downloaded """ if not os.path.exists(marc_file): return None marc_file = open(marc_file) marc_text = marc_file.read() marc_file.close() arXiv_match = '(([a-zA-Z\\-]+/\\d{7})|(\\d{4}\\.\\d{4}))' DESY_match = 'DESY-\\d{2,4}-\\d{3}' pdf_loc = None to_dir = os.path.join(CFG_TMPDIR, 'plotdata') possible_match = re.search(arXiv_match, marc_text) if possible_match != None: # it's listed on arXiv, hooray! arXiv_id = possible_match.group(0) dummy1, pdf_loc = harvest_single(arXiv_id, to_dir, ("pdf",)) possible_match = re.search(DESY_match, marc_text) if possible_match != None: # it's listed on DESY, hooray! desy_id = possible_match.group(0) dummy1, pdf_loc = harvest_single(desy_id, to_dir, ("pdf",)) return pdf_loc def harvest_from_file(filename, to_dir): """ Harvest from the file Tibor made. Format of a single entry: oai:arXiv.org:area/YYMMIII or oai:arXiv.org:YYMM.IIII """ ok_format = '^oai:arXiv.org:(([a-zA-Z\\-]+/\\d+)|(\\d+\\.\\d+))$' try: names_file = open(filename) for arXiv_name in names_file.readlines(): if re.match(ok_format, arXiv_name) == None: write_message('error on ' + arXiv_name + '. continuing.') continue harvest_single(arXiv_name, to_dir) time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) except IOError: write_message('Something is wrong with the file!') def old_URL_harvest(from_date, to_date, to_dir, area): """ Grab all the PDFs and tarballs off arXiv between from_date and to_date, where from_date and to_date are in YYMM form, and put them in their own separate folders inside of to_dir. Folder hierarchy will be to_dir/YYYY/MM/arXiv_id/stuff_downloaded_from_arXiv this obeys the old URL format @param: from_date (int): YYMM form of the date where we want to start harvesting @param: to_date (int): YYMM form of the date where we want to stop harvesting @param: to_dir (string): the base directory to put all these subdirs in @param: area (int): the index in the HEP_AREAS array of the area we are currently working on downloading @output: PDFs and tarballs from arXiv in a hierarchy rooted at to_dir @return: None """ yearmonthindex = from_date while yearmonthindex < to_date: sub_dir = make_useful_directories(yearmonthindex, to_dir) for paperindex in range(1, 1000): # for whatever reason, we can't count on these things to # start at 1 (in HEP_PH from 9403 to CENTURY_END only). # they start at frickin 202. #if area == HEP_PH and yearmonthindex < ARBITRARY_FROM_INDEX: # paperindex = paperindex + 201 # of note: before the URL change happened in 0704, it was # also the case that the paper numbers only had 3 digits next_to_harvest = '%04d%03d' % (yearmonthindex, paperindex) arXiv_id = area[AREA_STRING_INDEX] + next_to_harvest individual_dir = make_single_directory(sub_dir, arXiv_id) full_url = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER + \ area[URL] + next_to_harvest if not download(full_url, \ area[AREA_STRING_INDEX] + next_to_harvest, individual_dir): break full_pdf_url = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER + \ area[URL] + next_to_harvest download(full_pdf_url, \ area[AREA_STRING_INDEX] + next_to_harvest + PDF_EXTENSION, \ individual_dir) time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) if yearmonthindex % 100 == 12: # we reached the end of the year! yearmonthindex = yearmonthindex + FIX_FOR_YEAR_END yearmonthindex = yearmonthindex + 1 def new_URL_harvest(from_date, from_index, to_dir): """ Grab all the PDFs and tarballs off arXiv between from_date and to_date, where from_date and to_date are in YYMM form, and put them in their own separate folders inside of to_dir. Folder hierarchy will be to_dir/YYYY/MM/arXiv_id/stuff_downloaded_from_arXiv this obeys the new URL format @param: from_date (int): YYMM form of the date where we want to start harvesting @param: to_date (int): YYMM form of the date where we want to stop harvesting @param: to_dir (string): the base directory to put all these subdirs in @output: PDFs and tarballs from arXiv in a hierarchy rooted at to_dir @return: None """ global current_yearmonth yearmonthindex = from_date while yearmonthindex < current_yearmonth: if yearmonthindex == from_date: fro = from_index else: fro = 1 sub_dir = make_useful_directories(yearmonthindex, to_dir) for paperindex in range(fro, 10000): # of note: after the URL change happened in 0704, it was # the case that paper numbers had 4 digits next_to_harvest = '%04d.%04d' % (yearmonthindex, paperindex) arXiv_id = ARXIV_HEADER + next_to_harvest individual_dir = make_single_directory(sub_dir, arXiv_id) full_url = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER + \ next_to_harvest if not download(full_url, ARXIV_HEADER + next_to_harvest, \ individual_dir): break full_pdf_url = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER + \ next_to_harvest download(full_pdf_url, \ ARXIV_HEADER + next_to_harvest + PDF_EXTENSION, \ individual_dir) time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) # be nice to remote server if yearmonthindex % 100 == 12: # we reached the end of the year! yearmonthindex = yearmonthindex + FIX_FOR_YEAR_END yearmonthindex = yearmonthindex + 1 def download(url, filename, to_dir): """ Actually does the call and download given a URL and desired output filename. @param: url (string): where the file lives on the interwebs @param: filename (string): where the file should live after download @param: to_dir (string): the dir where our new files will live @output: a file in to_dir @return: True on success, False on failure """ new_file = os.path.join(to_dir, filename) try: - conn = urllib2.urlopen(url) + conn = PLOTEXTRACTOR_OPENER.open(url) response = conn.read() conn.close() new_file_fd = open(new_file, 'w') new_file_fd.write(response) new_file_fd.close() write_message('Downloaded to ' + new_file) return True except (IOError, urllib2.URLError), e: # this could be a permissions error, but it probably means that # there's nothing left in that section YYMM write_message('Error downloading from %s: \n%s\n' % (url, str(e))) return False diff --git a/modules/miscutil/lib/solrutils.py b/modules/miscutil/lib/solrutils.py index e2c5ea20a..94485de00 100644 --- a/modules/miscutil/lib/solrutils.py +++ b/modules/miscutil/lib/solrutils.py @@ -1,60 +1,63 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Solr utilities. """ import urllib2 import urllib import mimetools from invenio import intbitset +from invenio.urlutils import make_invenio_opener from invenio.jsonutils import json +SOLRUTILS_OPENER = make_invenio_opener('solrutils') + def solr_get_facets(bitset, solr_url): facet_query_url = "%s/invenio_facets" % solr_url # now use the bitset to fetch the facet data r = urllib2.Request(facet_query_url) data = bitset.fastdump() boundary = mimetools.choose_boundary() # fool solr into thinking we're uploading a file so it will read our data as a stream contents = '--%s\r\n' % boundary contents += 'Content-Disposition: form-data; name="bitset"; filename="bitset"\r\n' contents += 'Content-Type: application/octet-stream\r\n' contents += '\r\n' + data + '\r\n' contents += '--%s--\r\n\r\n' % boundary r.add_data(contents) contenttype = 'multipart/form-data; boundary=%s' % boundary r.add_unredirected_header('Content-Type', contenttype) # post the request and get back the facets as json - u = urllib2.urlopen(r) + u = SOLRUTILS_OPENER.open(r) return json.load(u) def solr_get_bitset(query, solr_url): invenio_query_url = "%s/select?qt=invenio_query&q=fulltext:%s" % (solr_url, urllib.quote(query)) # query to get a bitset bitset = intbitset.intbitset() - u = urllib2.urlopen(invenio_query_url) + u = SOLRUTILS_OPENER.open(invenio_query_url) data = u.read() bitset.fastload(data) return bitset diff --git a/modules/miscutil/lib/urlutils.py b/modules/miscutil/lib/urlutils.py index 7b87d50f5..339818b37 100644 --- a/modules/miscutil/lib/urlutils.py +++ b/modules/miscutil/lib/urlutils.py @@ -1,732 +1,792 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ urlutils.py -- helper functions for URL related problems such as argument washing, redirection, etc. """ __revision__ = "$Id$" import time import base64 import hmac import re import sys import os import inspect import urllib -from urllib import urlencode, quote_plus, quote +import urllib2 +from urllib import urlencode, quote_plus, quote, FancyURLopener from urlparse import urlparse from cgi import parse_qs, escape from md5 import md5 try: import BeautifulSoup BEAUTIFUL_SOUP_IMPORTED = True except ImportError: BEAUTIFUL_SOUP_IMPORTED = False try: from hashlib import sha256, sha1 HASHLIB_IMPORTED = True except ImportError: HASHLIB_IMPORTED = False from invenio import webinterface_handler_config as apache from invenio.config import \ CFG_SITE_URL, \ CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE, \ - CFG_WEBDIR + CFG_WEBDIR, CFG_SITE_NAME, CFG_VERSION def wash_url_argument(var, new_type): """ Wash argument into 'new_type', that can be 'list', 'str', 'int', 'tuple' or 'dict'. If needed, the check 'type(var) is not None' should be done before calling this function. @param var: variable value @param new_type: variable type, 'list', 'str', 'int', 'tuple' or 'dict' @return: as much as possible, value var as type new_type If var is a list, will change first element into new_type. If int check unsuccessful, returns 0 """ out = [] if new_type == 'list': # return lst if isinstance(var, list): out = var else: out = [var] elif new_type == 'str': # return str if isinstance(var, list): try: out = "%s" % var[0] except: out = "" elif isinstance(var, str): out = var else: out = "%s" % var elif new_type == 'int': # return int if isinstance(var, list): try: out = int(var[0]) except: out = 0 elif isinstance(var, (int, long)): out = var elif isinstance(var, str): try: out = int(var) except: out = 0 else: out = 0 elif new_type == 'tuple': # return tuple if isinstance(var, tuple): out = var else: out = (var, ) elif new_type == 'dict': # return dictionary if isinstance(var, dict): out = var else: out = {0: var} return out def redirect_to_url(req, url, redirection_type=None, norobot=False): """ Redirect current page to url. @param req: request as received from apache @param url: url to redirect to @param redirection_type: what kind of redirection is required: e.g.: apache.HTTP_MULTIPLE_CHOICES = 300 apache.HTTP_MOVED_PERMANENTLY = 301 apache.HTTP_MOVED_TEMPORARILY = 302 apache.HTTP_SEE_OTHER = 303 apache.HTTP_NOT_MODIFIED = 304 apache.HTTP_USE_PROXY = 305 apache.HTTP_TEMPORARY_REDIRECT = 307 The default is apache.HTTP_MOVED_TEMPORARILY @param norobot: wether to instruct crawlers and robots such as GoogleBot not to index past this point. @see: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3 """ if redirection_type is None: redirection_type = apache.HTTP_MOVED_TEMPORARILY - req.headers_out["Location"] = url del req.headers_out["Cache-Control"] req.headers_out["Cache-Control"] = "no-cache, private, no-store, " \ "must-revalidate, post-check=0, pre-check=0, max-age=0" req.headers_out["Pragma"] = "no-cache" if norobot: req.headers_out["X-Robots-Tag"] = "noarchive, nosnippet, noindex, nocache" + user_agent = req.headers_in.get('User-Agent') + if 'Microsoft Office Existence Discovery' in user_agent or 'ms-office' in user_agent: + ## HACK: this is to workaround Microsoft Office trying to be smart + ## when users click on URLs in Office documents that require + ## authentication. Office will check the validity of the URL + ## but will pass the browser the redirected URL rather than + ## the original one. This is incompatible with e.g. Shibboleth + ## based SSO since the referer would be lost. + ## See: http://support.microsoft.com/kb/899927 + req.status = 200 + req.content_type = 'text/html' + if req.method != 'HEAD': + req.write(""" + + + Intermediate page for URLs clicked on MS Office Documents + + + +

You are going to be redirected to the desired content within 5 seconds. If the redirection does not happen automatically please click on %(url_ok)s.

+ +""" % { + 'url': escape(req.unparsed_uri, True), + 'url_ok': escape(req.unparsed_uri) + }) + raise apache.SERVER_RETURN(apache.DONE) + + req.headers_out["Location"] = url + if req.response_sent_p: raise IOError("Cannot redirect after headers have already been sent.") req.status = redirection_type req.write('

Please go to here

\n' % url) raise apache.SERVER_RETURN, apache.DONE def get_referer(req, replace_ampersands=False): """ Return the referring page of a request. Referer (wikipedia): Referer is a common misspelling of the word "referrer"; so common, in fact, that it made it into the official specification of HTTP. When visiting a webpage, the referer or referring page is the URL of the previous webpage from which a link was followed. @param req: request @param replace_ampersands: if 1, replace & by & in url (correct HTML cannot contain & characters alone) """ try: referer = req.headers_in['Referer'] if replace_ampersands == 1: return referer.replace('&', '&') return referer except KeyError: return '' def drop_default_urlargd(urlargd, default_urlargd): lndefault = {} lndefault.update(default_urlargd) ## Commented out. An Invenio URL now should always specify the desired ## language, in order not to raise the automatic language discovery ## (client browser language can be used now in place of CFG_SITE_LANG) # lndefault['ln'] = (str, CFG_SITE_LANG) canonical = {} canonical.update(urlargd) for k, v in urlargd.items(): try: d = lndefault[k] if d[1] == v: del canonical[k] except KeyError: pass return canonical def make_canonical_urlargd(urlargd, default_urlargd): """ Build up the query part of an URL from the arguments passed in the 'urlargd' dictionary. 'default_urlargd' is a secondary dictionary which contains tuples of the form (type, default value) for the query arguments (this is the same dictionary as the one you can pass to webinterface_handler.wash_urlargd). When a query element has its default value, it is discarded, so that the simplest (canonical) url query is returned. The result contains the initial '?' if there are actual query items remaining. """ canonical = drop_default_urlargd(urlargd, default_urlargd) if canonical: return '?' + urlencode(canonical, doseq=True).replace('&', '&') return '' def create_html_link(urlbase, urlargd, link_label, linkattrd=None, escape_urlargd=True, escape_linkattrd=True): """Creates a W3C compliant link. @param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search) @param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'}) @param link_label: text displayed in a browser (has to be already escaped) @param linkattrd: dictionary of attributes (e.g. a={'class': 'img'}) @param escape_urlargd: boolean indicating if the function should escape arguments (e.g. < becomes < or " becomes ") @param escape_linkattrd: boolean indicating if the function should escape attributes (e.g. < becomes < or " becomes ") """ attributes_separator = ' ' output = '' return output def create_html_mailto(email, subject=None, body=None, cc=None, bcc=None, link_label="%(email)s", linkattrd=None, escape_urlargd=True, escape_linkattrd=True, email_obfuscation_mode=CFG_WEBSTYLE_EMAIL_ADDRESSES_OBFUSCATION_MODE): """Creates a W3C compliant 'mailto' link. Encode/encrypt given email to reduce undesired automated email harvesting when embedded in a web page. NOTE: there is no ultimate solution to protect against email harvesting. All have drawbacks and can more or less be circumvented. There are other techniques to protect email adresses. We implement the less annoying one for users. @param email: the recipient of the email @param subject: a default subject for the email (must not contain line feeds) @param body: a default body for the email @param cc: the co-recipient(s) of the email @param bcc: the hidden co-recpient(s) of the email @param link_label: the label of this mailto link. String replacement is performed on key %(email)s with the email address if needed. @param linkattrd: dictionary of attributes (e.g. a={'class': 'img'}) @param escape_urlargd: boolean indicating if the function should escape arguments (e.g. < becomes < or " becomes ") @param escape_linkattrd: boolean indicating if the function should escape attributes (e.g. < becomes < or " becomes ") @param email_obfuscation_mode: the protection mode. See below: You can choose among several modes to protect emails. It is advised to keep the default CFG_MISCUTIL_EMAIL_HARVESTING_PROTECTION value, so that it is possible for an admin to change the policy globally. Available modes ([t] means "transparent" for the user): -1: hide all emails, excepted CFG_SITE_ADMIN_EMAIL and CFG_SITE_SUPPORT_EMAIL. [t] 0 : no protection, email returned as is. foo@example.com => foo@example.com 1 : basic email munging: replaces @ by [at] and . by [dot] foo@example.com => foo [at] example [dot] com [t] 2 : transparent name mangling: characters are replaced by equivalent HTML entities. foo@example.com => foo@example.com [t] 3 : javascript insertion. Requires Javascript enabled on client side. 4 : replaces @ and . characters by gif equivalents. foo@example.com => foo [at] example [dot] com """ # TODO: implement other protection modes to encode/encript email: # ## [t] 5 : form submission. User is redirected to a form that he can ## fills in to send the email (??Use webmessage??). ## Depending on WebAccess, ask to answer a question. ## ## [t] 6 : if user can see (controlled by WebAccess), display. Else ## ask to login to see email. If user cannot see, display ## form submission. if linkattrd is None: linkattrd = {} parameters = {} if subject: parameters["subject"] = subject if body: parameters["body"] = body.replace('\r\n', '\n').replace('\n', '\r\n') if cc: parameters["cc"] = cc if bcc: parameters["bcc"] = bcc # Preprocessing values for some modes if email_obfuscation_mode == 1: # Basic Munging email = email.replace("@", " [at] ").replace(".", " [dot] ") elif email_obfuscation_mode == 2: # Transparent name mangling email = string_to_numeric_char_reference(email) if '%(email)s' in link_label: link_label = link_label % {'email': email} mailto_link = create_html_link('mailto:' + email, parameters, link_label, linkattrd, escape_urlargd, escape_linkattrd) if email_obfuscation_mode == 0: # Return "as is" return mailto_link elif email_obfuscation_mode == 1: # Basic Munging return mailto_link elif email_obfuscation_mode == 2: # Transparent name mangling return mailto_link elif email_obfuscation_mode == 3: # Javascript-based return '''''' % \ mailto_link[::-1].replace("'", "\\'") elif email_obfuscation_mode == 4: # GIFs-based email = email.replace('.', ' [dot] ' % CFG_SITE_URL) email = email.replace('@', ' [at] ' % CFG_SITE_URL) return email # All other cases, including mode -1: return "" def string_to_numeric_char_reference(string): """ Encode a string to HTML-compatible numeric character reference. Eg: encode_html_entities("abc") == 'abc' """ out = "" for char in string: out += "&#" + str(ord(char)) + ";" return out def create_url(urlbase, urlargd, escape_urlargd=True): """Creates a W3C compliant URL. Output will look like this: 'urlbase?param1=value1&param2=value2' @param urlbase: base url (e.g. invenio.config.CFG_SITE_URL/search) @param urlargd: dictionary of parameters. (e.g. p={'recid':3, 'of'='hb'} @param escape_urlargd: boolean indicating if the function should escape arguments (e.g. < becomes < or " becomes ") """ separator = '&' output = urlbase if urlargd: output += '?' if escape_urlargd: arguments = [escape(quote(str(key)), quote=True) + '=' + \ escape(quote(str(urlargd[key])), quote=True) for key in urlargd.keys()] else: arguments = [str(key) + '=' + str(urlargd[key]) for key in urlargd.keys()] output += separator.join(arguments) return output def same_urls_p(a, b): """ Compare two URLs, ignoring reorganizing of query arguments """ ua = list(urlparse(a)) ub = list(urlparse(b)) ua[4] = parse_qs(ua[4]) ub[4] = parse_qs(ub[4]) return ua == ub def urlargs_replace_text_in_arg(urlargs, regexp_argname, text_old, text_new): """Analyze `urlargs' (URL CGI GET query arguments in string form) and for each occurrence of argument matching `regexp_argname' replace every substring `text_old' by `text_new'. Return the resulting new URL. Used to be used for search engine's create_nearest_terms_box, now it is not used there anymore. It is left here in case it will become possibly useful later. """ out = "" # parse URL arguments into a dictionary: urlargsdict = parse_qs(urlargs) ## construct new URL arguments: urlargsdictnew = {} for key in urlargsdict.keys(): if re.match(regexp_argname, key): # replace `arg' by new values urlargsdictnew[key] = [] for parg in urlargsdict[key]: urlargsdictnew[key].append(parg.replace(text_old, text_new)) else: # keep old values urlargsdictnew[key] = urlargsdict[key] # build new URL for this word: for key in urlargsdictnew.keys(): for val in urlargsdictnew[key]: out += "&" + key + "=" + quote_plus(val, '') if out.startswith("&"): out = out[5:] return out def get_title_of_page(url): """ @param url: page to get the title from @return: the page title in utf-8 or None in case that any kind of exception occured e.g. connection error, URL not known """ if BEAUTIFUL_SOUP_IMPORTED: try: - soup = BeautifulSoup.BeautifulSoup(urllib.urlopen(url)) + opener = make_invenio_opener('UrlUtils') + soup = BeautifulSoup.BeautifulSoup(opener.open(url)) return soup.title.string.encode("utf-8") except: return None else: return "Title not available" +def make_user_agent_string(component=None): + """ + Return a nice and uniform user-agent string to be used when Invenio + act as a client in HTTP requests. + """ + ret = "Invenio-%s (+%s; \"%s\")" % (CFG_VERSION, CFG_SITE_URL, CFG_SITE_NAME) + if component: + ret += " %s" % component + return ret + +class InvenioFancyURLopener(FancyURLopener): + ## Provide default user agent string + version = make_user_agent_string() + def prompt_user_passwd(self, host, realm): + """Don't prompt""" + return None, None + +## Let's override default useragent string +## See: http://docs.python.org/release/2.4.4/lib/module-urllib.html +urllib._urlopener = InvenioFancyURLopener() + +def make_invenio_opener(component=None): + """ + Return an urllib2 opener with the useragent already set in the appropriate + way. + """ + opener = urllib2.build_opener() + opener.addheaders = [('User-agent', make_user_agent_string(component))] + return opener + def create_AWS_request_url(base_url, argd, _amazon_secret_access_key, _timestamp=None): """ Create a signed AWS (Amazon Web Service) request URL corresponding to the given parameters. Example: >> create_AWS_request_url("http://ecs.amazon.com/onca/xml", {'AWSAccessKeyID': '0000000000', 'Service': 'AWSECommerceService', 'Operation': 'ItemLookup', 'ItemID': '0679722769', 'ResponseGroup': 'ItemAttributes,Offers,Images,Review'}, "1234567890") @param base_url: Service URL of the Amazon store to query @param argd: dictionary of arguments defining the query @param _amazon_secret_access_key: your Amazon secret key @param _timestamp: for testing purpose only (default: current timestamp) @type base_url: string @type argd: dict @type _amazon_secret_access_key: string @type _timestamp: string @return signed URL of the request (string) """ ## First define a few util functions def get_AWS_signature(argd, _amazon_secret_access_key, method="GET", request_host="webservices.amazon.com", request_uri="/onca/xml", _timestamp=None): """ Returns the signature of an Amazon request, based on the arguments of the request. @param argd: dictionary of arguments defining the query @param _amazon_secret_access_key: your Amazon secret key @param method: method of the request POST or GET @param request_host: host contacted for the query. To embed in the signature. @param request_uri: uri contacted at 'request_host'. To embed in the signature. @param _timestamp: for testing purpose only (default: current timestamp) @type argd: dict @type _amazon_secret_access_key: string @type method: string @type host_header: string @type http_request_uri: string @type _timestamp: string @return signature of the request (string) """ # Add timestamp if not _timestamp: argd["Timestamp"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) else: argd["Timestamp"] = _timestamp # Order parameter keys by byte value parameter_keys = argd.keys() parameter_keys.sort() # Encode arguments, according to RFC 3986. Make sure we # generate a list which is ordered by byte value of the keys arguments = [quote(str(key), safe="~/") + "=" + \ quote(str(argd[key]), safe="~/") \ for key in parameter_keys] # Join parameters_string = "&".join(arguments) # Prefix parameters_string = method.upper() + "\n" + \ request_host.lower() + "\n" + \ (request_uri or "/") + "\n" + \ parameters_string # Sign and return return calculate_RFC2104_HMAC(parameters_string, _amazon_secret_access_key) def calculate_RFC2104_HMAC(data, _amazon_secret_access_key): """ Computes a RFC 2104 compliant HMAC Signature and then Base64 encodes it. Module hashlib must be installed if Python < 2.5 @param data: data to sign @param _amazon_secret_access_key: your Amazon secret key @type data: string @type _amazon_secret_access_key: string. Empty if hashlib module not installed """ if not HASHLIB_IMPORTED: try: raise Exception("Module hashlib not installed. Please install it.") except: from invenio.errorlib import register_exception register_exception(stream='warning', alert_admin=True, subject='Cannot create AWS signature') return "" else: if sys.version_info < (2, 5): # compatibility mode for Python < 2.5 and hashlib my_digest_algo = _MySHA256(sha256()) else: my_digest_algo = sha256 return base64.encodestring(hmac.new(_amazon_secret_access_key, data, my_digest_algo).digest()).strip() ## End util functions parsed_url = urlparse(base_url) signature = get_AWS_signature(argd, _amazon_secret_access_key, request_host=parsed_url[1], request_uri=parsed_url[2], _timestamp=_timestamp) if signature: argd["Signature"] = signature return base_url + "?" + urlencode(argd) def create_Indico_request_url(base_url, indico_what, indico_loc, indico_id, indico_type, indico_params, indico_key, indico_sig, _timestamp=None): """ Create a signed Indico request URL to access Indico HTTP Export APIs. See U{http://indico.cern.ch/ihelp/html/ExportAPI/index.html} for more information. Example: >> create_Indico_request_url("https://indico.cern.ch", "categ", "", [1, 7], "xml", {'onlypublic': 'yes', 'order': 'title', 'from': 'today', 'to': 'tomorrow'}, '00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000000') @param base_url: Service base URL of the Indico instance to query @param indico_what: element to export @type indico_what: one of the strings: C{categ}, C{event}, C{room}, C{reservation} @param indico_loc: location of the element(s) specified by ID (only used for some elements) @param indico_id: ID of the element to be exported @type indico_id: a string or a list/tuple of strings @param indico_type: output format @type indico_type: one of the strings: C{json}, C{jsonp}, C{xml}, C{html}, C{ics}, C{atom} @param indico_params: parameters of the query. See U{http://indico.cern.ch/ihelp/html/ExportAPI/common.html} @param indico_key: API key provided for the given Indico instance @param indico_sig: API secret key (signature) provided for the given Indico instance @param _timestamp: for testing purpose only (default: current timestamp) @return signed URL of the request (string) """ url = '/export/' + indico_what + '/' if indico_loc: url += indico_loc + '/' if type(indico_id) in (list, tuple): # dash separated list of values indico_id = '-'.join([str(x) for x in indico_id]) url += indico_id + '.' + str(indico_type) if hasattr(indico_params, 'items'): items = indico_params.items() else: items = list(indico_params) if indico_key: items.append(('apikey', indico_key)) if indico_sig and HASHLIB_IMPORTED: if _timestamp: items.append(('timestamp', str(_timestamp))) else: items.append(('timestamp', str(int(time.time())))) items = sorted(items, key=lambda x: x[0].lower()) url_to_sign = '%s?%s' % (url, urlencode(items)) if sys.version_info < (2, 5): # compatibility mode for Python < 2.5 and hashlib my_digest_algo = _MySHA1(sha1()) else: my_digest_algo = sha1 signature = hmac.new(indico_sig, url_to_sign, my_digest_algo).hexdigest() items.append(('signature', signature)) elif not HASHLIB_IMPORTED: try: raise Exception("Module hashlib not installed. Please install it.") except: from invenio.errorlib import register_exception register_exception(stream='warning', alert_admin=True, subject='Cannot create AWS signature') if not items: return url url = '%s%s?%s' % (base_url.strip('/'), url, urlencode(items)) return url class _MyHashlibAlgo(object): ''' Define a subclass of any hashlib algorithm class, with an additional "new()" function, to work with the Python < 2.5 version of the hmac module. (This class is more complex than it should, but it is not possible to subclass haslib algorithm) ''' def __init__(self, obj): """Set the wrapped object.""" super(_MyHashlibAlgo, self).__setattr__('_obj', obj) methods = [] for name_value in inspect.getmembers(obj, inspect.ismethod): methods.append(name_value[0]) super(_MyHashlibAlgo, self).__setattr__('__methods__', methods) def isnotmethod(object_): "Opposite of ismethod(..)" return not inspect.ismethod(object_) members = [] for name_value in inspect.getmembers(obj, isnotmethod): members.append(name_value[0]) super(_MyHashlibAlgo, self).__setattr__('__members__', members) def __getattr__(self, name): """Redirect unhandled get attribute to self._obj.""" if not hasattr(self._obj, name): raise AttributeError, ("'%s' has no attribute %s" % (self.__class__.__name__, name)) else: return getattr(self._obj, name) def __setattr__(self, name, value): """Redirect set attribute to self._obj if necessary.""" self_has_attr = True try: super(_MyHashlibAlgo, self).__getattribute__(name) except AttributeError: self_has_attr = False if (name == "_obj" or not hasattr(self, "_obj") or not hasattr(self._obj, name) or self_has_attr): return super(_MyHashlibAlgo, self).__setattr__(name, value) else: return setattr(self._obj, name, value) class _MySHA256(_MyHashlibAlgo): "A _MyHashlibAlgo subsclass for sha256" new = lambda d = '': sha256() class _MySHA1(_MyHashlibAlgo): "A _MyHashlibAlgo subsclass for sha1" new = lambda d = '': sha1() def auto_version_url(file_path): """ Appends modification time of the file to the request URL in order for the browser to refresh the cache when file changes @param file_path: path to the file, e.g js/foo.js @return: file_path with modification time appended to URL """ return file_path + "?%s" % md5(open(CFG_WEBDIR + os.sep + file_path).read()).hexdigest() diff --git a/modules/miscutil/lib/web_api_key.py b/modules/miscutil/lib/web_api_key.py new file mode 100644 index 000000000..0ba4dd10b --- /dev/null +++ b/modules/miscutil/lib/web_api_key.py @@ -0,0 +1,239 @@ +# -*- coding: utf-8 -*- +## +## This file is part of Invenio. +## Copyright (C) 2006, 2007, 2008, 2010, 2011 CERN. +## +## Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +""" + Invenio utilities to perform a REST like authentication +""" +import sys +if sys.version_info < (2, 5): + import sha as sha1 +else: + from hashlib import sha1 +import hmac +import time +from cgi import parse_qsl +from urllib import urlencode +import re + +try: + from uuid import uuid4 +except ImportError: + import random + def uuid4(): + return "%x" % random.getrandbits(16*8) + +from invenio.dbquery import run_sql, IntegrityError +from invenio.config import CFG_WEB_API_KEY_ALLOWED_URL +from invenio.access_control_config import CFG_WEB_API_KEY_STATUS + + +_CFG_WEB_API_KEY_ALLOWED_URL = [(re.compile(_url), _authorized_time, _need_timestamp) + for _url, _authorized_time, _need_timestamp in CFG_WEB_API_KEY_ALLOWED_URL] + + +def create_new_web_api_key(uid, key_description=None): + """ + Creates a new pair REST API key / secret key for the user. To do that it + uses the uuid4 function. + + @param uid: User's id for the new REST API key + @type uid: int + @param key_description: User's description for the REST API key + @type key_description: string + """ + key_id = str(uuid4()) + key_secrect = str(uuid4()) + while True: + try: + run_sql("INSERT INTO webapikey (id,secret,id_user,description) VALUES(%s,%s,%s,%s)", + (key_id, key_secrect, uid, key_description)) + break + except IntegrityError: + key_id = str(uuid4()) + +def show_web_api_keys(uid, diff_status=CFG_WEB_API_KEY_STATUS['REMOVED']): + """ + Makes a query to the DB to obtain all the user's REST API keys + + @param uid: User's id + @type uid: int + @param diff_status: This string indicates if the query will show + all the REST API keys or only the ones that still active (usefull in the + admin part) + @type diff_statusparam: string + + @return: Tuples with the id, description and status of the user's REST API + keys + """ + keys_info = run_sql("SELECT id, description, status FROM webapikey WHERE id_user = %s AND status <> %s", + (uid, diff_status)) + return keys_info + +def mark_web_api_key_as_removed(key_id): + """ + When the user wants to remove one of his key, this functions puts the status + value of that key to remove, this way the user doesn't see the key anymore + but the admin user stills see it, make statistics whit it, etc. + + @param key_id: The id of the REST key that will be "removed" + @type key_id: string + """ + run_sql("UPDATE webapikey SET status=%s WHERE id=%s", (CFG_WEB_API_KEY_STATUS['REMOVED'], key_id, )) + +def get_available_web_api_keys(uid): + """ + Search for all the available REST keys, it means all the user's keys that are + not marked as REMOVED or REVOKED + + @param uid: The user id + @type uid: int + + @return: Tuples of REST API public keys + """ + keys = run_sql("SELECT id FROM webapikey WHERE id_user=%s AND status <> %s AND status <> %s", + (uid, CFG_WEB_API_KEY_STATUS['REMOVED'], CFG_WEB_API_KEY_STATUS['REVOKED'])) + return keys + +def acc_get_uid_from_request(path, args): + """ + Looks in the data base for the secret that matches with the API key in the + request. If the REST API key is found and if the signature is correct + returns the user's id. + + @param path: uri of the request until the "?" (i.e.: req.uri) + @type path: string + @param args: All the params of the request (i.e.: req.args) + @type args: string + + @return: If everything goes well it returns the user's uid, it not -1 + """ + from invenio.webstat import register_customevent + + params = parse_qsl(args) + api_key = signature = timestamp = None + + for param in params: + if param[0] == 'apikey': + api_key = param[1] + elif param[0] == 'signature': + signature = param[1] + params.remove(param) #Get rid of the signature + elif param [0] == 'timestamp': + timestamp = param[1] + #Check if the url is well built + if api_key == None or signature == None: + return -1 + + url_req = "%s?%s" % (path, urlencode(params)) + + authorized_time = None + need_timestamp = False + for url, authorized_time, need_timestamp in _CFG_WEB_API_KEY_ALLOWED_URL: + if url.match(url_req) is not None: + break + + if need_timestamp and timestamp == None: + return -1 + + if authorized_time is None: + return -1 + + if authorized_time != 0 and need_timestamp: + time_lapse = time.time() - float(timestamp) + if time_lapse > authorized_time or time_lapse < 0: + return -1 + + key = run_sql("SELECT id_user, secret FROM webapikey WHERE id=%s AND status <> %s AND status <> %s", + (api_key, CFG_WEB_API_KEY_STATUS['REMOVED'], CFG_WEB_API_KEY_STATUS['REVOKED'])) + if len(key) == 0 or not key: + return -1 + else: + uid = key[0][0] + secret_key = key[0][1] + server_signature = hmac.new(secret_key, url_req, sha1).hexdigest() + if signature == server_signature: + #If the signature is fine, log the key activity and return the UID + register_customevent("apikeyusage", [uid, api_key, path, url_req]) + return uid + else: + return -1 + +def build_web_request(path, params, uid=-1, api_key=None, timestamp=True): + """ + Build a new request that uses REST authentication. + 1. Add your REST API key to the params + 2. Add the current timestamp to the params, if needed + 3. Sort the query string params + 4. Merge path and the sorted query string to a single string + 5. Create a HMAC-SHA1 signature of this string using your secret key as the key + 6. Append the hex-encoded signature to your query string + + @note: If the api_key parameter is None, then this method performs a search + in the data base using the uid parameter to get on of the user's REST + API key. If the user has one or more usable REST API key this method + uses the first to appear. + + @param path: uri of the request until the "?" (i.e.: /search) + @type path: string + @param params: All the params of the request (i.e.: req.args or a dictionary + with the param name as key) + @type params: string or dict + @param api_key: User REST API key + @type api_key: string + @param uid: User's id to do the search for the REST API key + @type uid: int + @param timestamp: Indicates if timestamp is needed in the request + @type timestamp: boolean + + @return: Signed request string or, in case of error, '' + """ + if not isinstance(params, dict): + if len(params) != 0 and params[0] == '?': + params = params.replace('?','') + params = parse_qsl(params) + + items = (hasattr(params, 'items') and [params.items()] or [list(params)])[0] + + if api_key: + items.append(('apikey', api_key)) + elif uid > 0: + keys = run_sql("SELECT id FROM webapikey WHERE id_user=%s AND status <> %s AND status <> %s", + (uid, CFG_WEB_API_KEY_STATUS['REMOVED'], CFG_WEB_API_KEY_STATUS['REVOKED'])) + if keys is not None and len(keys) != 0: + api_key = keys[0][0] + items.append(('apikey', api_key)) + else: + return '' + else: + return '' + + if timestamp: + items.append(('timestamp', str(int(time.time())))) + + items = sorted(items, key=lambda x: x[0].lower()) + url = '%s?%s' % (path, urlencode(items)) + + secret_key = run_sql("SELECT secret FROM webapikey WHERE id=%s", (api_key,)) + if len(secret_key) == 0 or not secret_key: + return '' + signature = hmac.new(secret_key[0][0], url, sha1).hexdigest() + items.append(('signature', signature)) + if not items: + return path + return '%s?%s' % (path, urlencode(items)) diff --git a/modules/miscutil/lib/web_api_key_tests.py b/modules/miscutil/lib/web_api_key_tests.py new file mode 100644 index 000000000..5b1944ddb --- /dev/null +++ b/modules/miscutil/lib/web_api_key_tests.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- +## +## This file is part of Invenio. +## Copyright (C) 2006, 2007, 2008, 2010, 2011 CERN. +## +## Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +from invenio import web_api_key + +"""Unit tests for REST like authentication API.""" + +try: + import hashlib +except: + pass +import unittest +import re +import hmac +import urllib +import time +import string + +from invenio.testutils import make_test_suite, run_test_suite +from invenio.dbquery import run_sql + +web_api_key.CFG_WEB_API_KEY_ALLOWED_URL = [('/search\?*', 0, True), + ('/bad\?*', -1, True)] #Just for testing + +web_api_key._CFG_WEB_API_KEY_ALLOWED_URL = [(re.compile(_url), _authorized_time, _need_timestamp) + for _url, _authorized_time, _need_timestamp in web_api_key.CFG_WEB_API_KEY_ALLOWED_URL] + +def build_web_request(path, params, api_key=None, secret_key=None): + items = (hasattr(params, 'items') and [params.items()] or [list(params)])[0] + if api_key: + items.append(('apikey', api_key)) + if secret_key: + items.append(('timestamp', str(int(time.time())))) + items = sorted(items, key=lambda x: x[0].lower()) + url = '%s?%s' % (path, urllib.urlencode(items)) + signature = hmac.new(secret_key, url, hashlib.sha1).hexdigest() + items.append(('signature', signature)) + if not items: + return path + return '%s?%s' % (path, urllib.urlencode(items)) + +class APIKeyTest(unittest.TestCase): + """ Test functions related to the REST authentication API """ + def setUp(self): + self.id_admin = run_sql('SELECT id FROM user WHERE nickname="admin"')[0][0] + + def test_create_remove_show_key(self): + """apikey - create/list/delete REST key""" + self.assertEqual(0, len(web_api_key.show_web_api_keys(uid=self.id_admin))) + web_api_key.create_new_web_api_key(self.id_admin, "Test key I") + web_api_key.create_new_web_api_key(self.id_admin, "Test key II") + web_api_key.create_new_web_api_key(self.id_admin, "Test key III") + web_api_key.create_new_web_api_key(self.id_admin, "Test key IV") + web_api_key.create_new_web_api_key(self.id_admin, "Test key V") + self.assertEqual(5, len(web_api_key.show_web_api_keys(uid=self.id_admin))) + self.assertEqual(5, len(web_api_key.show_web_api_keys(uid=self.id_admin, diff_status=''))) + keys_info = web_api_key.show_web_api_keys(uid=self.id_admin) + web_api_key.mark_web_api_key_as_removed(keys_info[0][0]) + self.assertEqual(4, len(web_api_key.show_web_api_keys(uid=self.id_admin))) + self.assertEqual(5, len(web_api_key.show_web_api_keys(uid=self.id_admin,diff_status=''))) + + run_sql("UPDATE webapikey SET status='WARNING' WHERE id=%s", (keys_info[1][0],)) + run_sql("UPDATE webapikey SET status='REVOKED' WHERE id=%s", (keys_info[2][0],)) + + self.assertEqual(4, len(web_api_key.show_web_api_keys(uid=self.id_admin))) + self.assertEqual(5, len(web_api_key.show_web_api_keys(uid=self.id_admin, diff_status=''))) + + run_sql("DELETE FROM webapikey") + + def test_acc_get_uid_from_request(self): + """webapikey - Login user from request using REST key""" + path = '/search' + params = 'ln=es&sc=1&c=Articles & Preprints&action_search=Buscar&p=ellis' + + self.assertEqual(0, len(web_api_key.show_web_api_keys(uid=self.id_admin))) + web_api_key.create_new_web_api_key(self.id_admin, "Test key I") + + key_info = run_sql("SELECT id FROM webapikey WHERE id_user=%s", (self.id_admin,)) + url = web_api_key.build_web_request(path, params, api_key=key_info[0][0]) + url = string.split(url, '?') + uid = web_api_key.acc_get_uid_from_request(url[0], url[1]) + self.assertEqual(uid, self.id_admin) + + url = web_api_key.build_web_request(path, params, api_key=key_info[0][0]) + url += "123" # corrupt the key + url = string.split(url, '?') + uid = web_api_key.acc_get_uid_from_request(url[0], url[1]) + self.assertEqual(uid, -1) + + path = '/bad' + uid = web_api_key.acc_get_uid_from_request(path, "") + self.assertEqual(uid, -1) + params = { 'nocache': 'yes', 'limit': 123 } + url = web_api_key.build_web_request(path, params, api_key=key_info[0][0]) + url = string.split(url, '?') + uid = web_api_key.acc_get_uid_from_request(url[0], url[1]) + self.assertEqual(uid, -1) + + run_sql("DELETE FROM webapikey") + +TEST_SUITE = make_test_suite(APIKeyTest) + +if __name__ == "__main__": + run_test_suite(TEST_SUITE) + run_sql("DELETE FROM webapikey") \ No newline at end of file diff --git a/modules/miscutil/sql/tabbibclean.sql b/modules/miscutil/sql/tabbibclean.sql index e3321df9a..5ed7a3e5e 100644 --- a/modules/miscutil/sql/tabbibclean.sql +++ b/modules/miscutil/sql/tabbibclean.sql @@ -1,341 +1,340 @@ TRUNCATE bibrec; TRUNCATE bib00x; TRUNCATE bib01x; TRUNCATE bib02x; TRUNCATE bib03x; TRUNCATE bib04x; TRUNCATE bib05x; TRUNCATE bib06x; TRUNCATE bib07x; TRUNCATE bib08x; TRUNCATE bib09x; TRUNCATE bib10x; TRUNCATE bib11x; TRUNCATE bib12x; TRUNCATE bib13x; TRUNCATE bib14x; TRUNCATE bib15x; TRUNCATE bib16x; TRUNCATE bib17x; TRUNCATE bib18x; TRUNCATE bib19x; TRUNCATE bib20x; TRUNCATE bib21x; TRUNCATE bib22x; TRUNCATE bib23x; TRUNCATE bib24x; TRUNCATE bib25x; TRUNCATE bib26x; TRUNCATE bib27x; TRUNCATE bib28x; TRUNCATE bib29x; TRUNCATE bib30x; TRUNCATE bib31x; TRUNCATE bib32x; TRUNCATE bib33x; TRUNCATE bib34x; TRUNCATE bib35x; TRUNCATE bib36x; TRUNCATE bib37x; TRUNCATE bib38x; TRUNCATE bib39x; TRUNCATE bib40x; TRUNCATE bib41x; TRUNCATE bib42x; TRUNCATE bib43x; TRUNCATE bib44x; TRUNCATE bib45x; TRUNCATE bib46x; TRUNCATE bib47x; TRUNCATE bib48x; TRUNCATE bib49x; TRUNCATE bib50x; TRUNCATE bib51x; TRUNCATE bib52x; TRUNCATE bib53x; TRUNCATE bib54x; TRUNCATE bib55x; TRUNCATE bib56x; TRUNCATE bib57x; TRUNCATE bib58x; TRUNCATE bib59x; TRUNCATE bib60x; TRUNCATE bib61x; TRUNCATE bib62x; TRUNCATE bib63x; TRUNCATE bib64x; TRUNCATE bib65x; TRUNCATE bib66x; TRUNCATE bib67x; TRUNCATE bib68x; TRUNCATE bib69x; TRUNCATE bib70x; TRUNCATE bib71x; TRUNCATE bib72x; TRUNCATE bib73x; TRUNCATE bib74x; TRUNCATE bib75x; TRUNCATE bib76x; TRUNCATE bib77x; TRUNCATE bib78x; TRUNCATE bib79x; TRUNCATE bib80x; TRUNCATE bib81x; TRUNCATE bib82x; TRUNCATE bib83x; TRUNCATE bib84x; TRUNCATE bib85x; TRUNCATE bib86x; TRUNCATE bib87x; TRUNCATE bib88x; TRUNCATE bib89x; TRUNCATE bib90x; TRUNCATE bib91x; TRUNCATE bib92x; TRUNCATE bib93x; TRUNCATE bib94x; TRUNCATE bib95x; TRUNCATE bib96x; TRUNCATE bib97x; TRUNCATE bib98x; TRUNCATE bib99x; TRUNCATE bibrec_bib00x; TRUNCATE bibrec_bib01x; TRUNCATE bibrec_bib02x; TRUNCATE bibrec_bib03x; TRUNCATE bibrec_bib04x; TRUNCATE bibrec_bib05x; TRUNCATE bibrec_bib06x; TRUNCATE bibrec_bib07x; TRUNCATE bibrec_bib08x; TRUNCATE bibrec_bib09x; TRUNCATE bibrec_bib10x; TRUNCATE bibrec_bib11x; TRUNCATE bibrec_bib12x; TRUNCATE bibrec_bib13x; TRUNCATE bibrec_bib14x; TRUNCATE bibrec_bib15x; TRUNCATE bibrec_bib16x; TRUNCATE bibrec_bib17x; TRUNCATE bibrec_bib18x; TRUNCATE bibrec_bib19x; TRUNCATE bibrec_bib20x; TRUNCATE bibrec_bib21x; TRUNCATE bibrec_bib22x; TRUNCATE bibrec_bib23x; TRUNCATE bibrec_bib24x; TRUNCATE bibrec_bib25x; TRUNCATE bibrec_bib26x; TRUNCATE bibrec_bib27x; TRUNCATE bibrec_bib28x; TRUNCATE bibrec_bib29x; TRUNCATE bibrec_bib30x; TRUNCATE bibrec_bib31x; TRUNCATE bibrec_bib32x; TRUNCATE bibrec_bib33x; TRUNCATE bibrec_bib34x; TRUNCATE bibrec_bib35x; TRUNCATE bibrec_bib36x; TRUNCATE bibrec_bib37x; TRUNCATE bibrec_bib38x; TRUNCATE bibrec_bib39x; TRUNCATE bibrec_bib40x; TRUNCATE bibrec_bib41x; TRUNCATE bibrec_bib42x; TRUNCATE bibrec_bib43x; TRUNCATE bibrec_bib44x; TRUNCATE bibrec_bib45x; TRUNCATE bibrec_bib46x; TRUNCATE bibrec_bib47x; TRUNCATE bibrec_bib48x; TRUNCATE bibrec_bib49x; TRUNCATE bibrec_bib50x; TRUNCATE bibrec_bib51x; TRUNCATE bibrec_bib52x; TRUNCATE bibrec_bib53x; TRUNCATE bibrec_bib54x; TRUNCATE bibrec_bib55x; TRUNCATE bibrec_bib56x; TRUNCATE bibrec_bib57x; TRUNCATE bibrec_bib58x; TRUNCATE bibrec_bib59x; TRUNCATE bibrec_bib60x; TRUNCATE bibrec_bib61x; TRUNCATE bibrec_bib62x; TRUNCATE bibrec_bib63x; TRUNCATE bibrec_bib64x; TRUNCATE bibrec_bib65x; TRUNCATE bibrec_bib66x; TRUNCATE bibrec_bib67x; TRUNCATE bibrec_bib68x; TRUNCATE bibrec_bib69x; TRUNCATE bibrec_bib70x; TRUNCATE bibrec_bib71x; TRUNCATE bibrec_bib72x; TRUNCATE bibrec_bib73x; TRUNCATE bibrec_bib74x; TRUNCATE bibrec_bib75x; TRUNCATE bibrec_bib76x; TRUNCATE bibrec_bib77x; TRUNCATE bibrec_bib78x; TRUNCATE bibrec_bib79x; TRUNCATE bibrec_bib80x; TRUNCATE bibrec_bib81x; TRUNCATE bibrec_bib82x; TRUNCATE bibrec_bib83x; TRUNCATE bibrec_bib84x; TRUNCATE bibrec_bib85x; TRUNCATE bibrec_bib86x; TRUNCATE bibrec_bib87x; TRUNCATE bibrec_bib88x; TRUNCATE bibrec_bib89x; TRUNCATE bibrec_bib90x; TRUNCATE bibrec_bib91x; TRUNCATE bibrec_bib92x; TRUNCATE bibrec_bib93x; TRUNCATE bibrec_bib94x; TRUNCATE bibrec_bib95x; TRUNCATE bibrec_bib96x; TRUNCATE bibrec_bib97x; TRUNCATE bibrec_bib98x; TRUNCATE bibrec_bib99x; TRUNCATE bibfmt; TRUNCATE idxWORD01F; TRUNCATE idxWORD02F; TRUNCATE idxWORD03F; TRUNCATE idxWORD04F; TRUNCATE idxWORD05F; TRUNCATE idxWORD06F; TRUNCATE idxWORD07F; TRUNCATE idxWORD08F; TRUNCATE idxWORD09F; TRUNCATE idxWORD10F; TRUNCATE idxWORD11F; TRUNCATE idxWORD12F; TRUNCATE idxWORD13F; TRUNCATE idxWORD14F; TRUNCATE idxWORD15F; TRUNCATE idxWORD16F; TRUNCATE idxWORD17F; TRUNCATE idxWORD18F; TRUNCATE idxWORD01R; TRUNCATE idxWORD02R; TRUNCATE idxWORD03R; TRUNCATE idxWORD04R; TRUNCATE idxWORD05R; TRUNCATE idxWORD06R; TRUNCATE idxWORD07R; TRUNCATE idxWORD08R; TRUNCATE idxWORD09R; TRUNCATE idxWORD10R; TRUNCATE idxWORD11R; TRUNCATE idxWORD12R; TRUNCATE idxWORD13R; TRUNCATE idxWORD14R; TRUNCATE idxWORD15R; TRUNCATE idxWORD16R; TRUNCATE idxWORD17R; TRUNCATE idxWORD18R; TRUNCATE idxPAIR01F; TRUNCATE idxPAIR02F; TRUNCATE idxPAIR03F; TRUNCATE idxPAIR04F; TRUNCATE idxPAIR05F; TRUNCATE idxPAIR06F; TRUNCATE idxPAIR07F; TRUNCATE idxPAIR08F; TRUNCATE idxPAIR09F; TRUNCATE idxPAIR10F; TRUNCATE idxPAIR11F; TRUNCATE idxPAIR12F; TRUNCATE idxPAIR13F; TRUNCATE idxPAIR14F; TRUNCATE idxPAIR15F; TRUNCATE idxPAIR16F; TRUNCATE idxPAIR17F; TRUNCATE idxPAIR18F; TRUNCATE idxPAIR01R; TRUNCATE idxPAIR02R; TRUNCATE idxPAIR03R; TRUNCATE idxPAIR04R; TRUNCATE idxPAIR05R; TRUNCATE idxPAIR06R; TRUNCATE idxPAIR07R; TRUNCATE idxPAIR08R; TRUNCATE idxPAIR09R; TRUNCATE idxPAIR10R; TRUNCATE idxPAIR11R; TRUNCATE idxPAIR12R; TRUNCATE idxPAIR13R; TRUNCATE idxPAIR14R; TRUNCATE idxPAIR15R; TRUNCATE idxPAIR16R; TRUNCATE idxPAIR17R; TRUNCATE idxPAIR18R; TRUNCATE idxPHRASE01F; TRUNCATE idxPHRASE02F; TRUNCATE idxPHRASE03F; TRUNCATE idxPHRASE04F; TRUNCATE idxPHRASE05F; TRUNCATE idxPHRASE06F; TRUNCATE idxPHRASE07F; TRUNCATE idxPHRASE08F; TRUNCATE idxPHRASE09F; TRUNCATE idxPHRASE10F; TRUNCATE idxPHRASE11F; TRUNCATE idxPHRASE12F; TRUNCATE idxPHRASE13F; TRUNCATE idxPHRASE14F; TRUNCATE idxPHRASE15F; TRUNCATE idxPHRASE16F; TRUNCATE idxPHRASE17F; TRUNCATE idxPHRASE18F; TRUNCATE idxPHRASE01R; TRUNCATE idxPHRASE02R; TRUNCATE idxPHRASE03R; TRUNCATE idxPHRASE04R; TRUNCATE idxPHRASE05R; TRUNCATE idxPHRASE06R; TRUNCATE idxPHRASE07R; TRUNCATE idxPHRASE08R; TRUNCATE idxPHRASE09R; TRUNCATE idxPHRASE10R; TRUNCATE idxPHRASE11R; TRUNCATE idxPHRASE12R; TRUNCATE idxPHRASE13R; TRUNCATE idxPHRASE14R; TRUNCATE idxPHRASE15R; TRUNCATE idxPHRASE16R; TRUNCATE idxPHRASE17R; TRUNCATE idxPHRASE18R; TRUNCATE rnkMETHODDATA; TRUNCATE rnkCITATIONDATA; TRUNCATE rnkDOWNLOADS; TRUNCATE rnkPAGEVIEWS; TRUNCATE rnkWORD01F; TRUNCATE rnkWORD01R; TRUNCATE bibdoc; TRUNCATE bibrec_bibdoc; TRUNCATE bibdoc_bibdoc; TRUNCATE bibdocfsinfo; TRUNCATE sbmAPPROVAL; TRUNCATE sbmSUBMISSIONS; TRUNCATE sbmPUBLICATION; TRUNCATE sbmPUBLICATIONCOMM; TRUNCATE sbmPUBLICATIONDATA; TRUNCATE hstRECORD; TRUNCATE hstDOCUMENT; TRUNCATE bibHOLDINGPEN; TRUNCATE hstEXCEPTION; TRUNCATE aidPERSONIDDATA; TRUNCATE aidRESULTS; -TRUNCATE aidPROBCACHE; TRUNCATE aidCACHE; TRUNCATE aidPERSONIDPAPERS; TRUNCATE aidUSERINPUTLOG; TRUNCATE lnkENTRY; TRUNCATE lnkENTRYURLTITLE; TRUNCATE lnkENTRYLOG; TRUNCATE lnkLOG; TRUNCATE lnkADMINURL; TRUNCATE lnkADMINURLLOG; diff --git a/modules/miscutil/sql/tabcreate.sql b/modules/miscutil/sql/tabcreate.sql index ed89f1abd..9b2333402 100644 --- a/modules/miscutil/sql/tabcreate.sql +++ b/modules/miscutil/sql/tabcreate.sql @@ -1,4163 +1,4180 @@ -- This file is part of Invenio. -- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. -- -- Invenio is free software; you can redistribute it and/or -- modify it under the terms of the GNU General Public License as -- published by the Free Software Foundation; either version 2 of the -- License, or (at your option) any later version. -- -- Invenio is distributed in the hope that it will be useful, but -- WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Invenio; if not, write to the Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -- tables for bibliographic records: CREATE TABLE IF NOT EXISTS bibrec ( id mediumint(8) unsigned NOT NULL auto_increment, creation_date datetime NOT NULL default '0000-00-00', modification_date datetime NOT NULL default '0000-00-00', PRIMARY KEY (id), KEY creation_date (creation_date), KEY modification_date (modification_date) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib00x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib01x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib02x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib03x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib04x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib05x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib06x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib07x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib08x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib09x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib10x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib11x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib12x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib13x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib14x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib15x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib16x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib17x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib18x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib19x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib20x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib21x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib22x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib23x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib24x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib25x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib26x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib27x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib28x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib29x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib30x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib31x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib32x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib33x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib34x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib35x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib36x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib37x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib38x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib39x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib40x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib41x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib42x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib43x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib44x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib45x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib46x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib47x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib48x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib49x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib50x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib51x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib52x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib53x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib54x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib55x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib56x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib57x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib58x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib59x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib60x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib61x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib62x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib63x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib64x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib65x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib66x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib67x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib68x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib69x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib70x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib71x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib72x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib73x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib74x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib75x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib76x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib77x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib78x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib79x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib80x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib81x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib82x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib83x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib84x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib85x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(100)) -- URLs need usually a larger index for speedy lookups ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib86x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib87x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib88x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib89x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib90x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib91x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib92x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib93x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib94x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib95x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib96x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib97x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib98x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bib99x ( id mediumint(8) unsigned NOT NULL auto_increment, tag varchar(6) NOT NULL default '', value text NOT NULL, PRIMARY KEY (id), KEY kt (tag), KEY kv (value(35)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib00x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib01x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib02x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib03x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib04x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib05x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib06x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib07x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib08x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib09x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib10x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib11x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib12x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib13x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib14x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib15x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib16x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib17x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib18x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib19x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib20x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib21x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib22x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib23x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib24x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib25x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib26x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib27x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib28x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib29x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib30x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib31x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib32x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib33x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib34x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib35x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib36x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib37x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib38x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib39x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib40x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib41x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib42x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib43x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib44x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib45x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib46x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib47x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib48x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib49x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib50x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib51x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib52x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib53x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib54x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib55x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib56x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib57x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib58x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib59x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib60x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib61x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib62x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib63x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib64x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib65x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib66x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib67x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib68x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib69x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib70x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib71x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib72x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib73x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib74x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib75x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib76x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib77x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib78x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib79x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib80x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib81x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib82x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib83x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib84x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib85x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib86x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib87x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib88x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib89x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib90x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib91x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib92x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib93x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib94x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib95x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib96x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib97x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib98x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bib99x ( id_bibrec mediumint(8) unsigned NOT NULL default '0', id_bibxxx mediumint(8) unsigned NOT NULL default '0', field_number smallint(5) unsigned default NULL, KEY id_bibxxx (id_bibxxx), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; -- tables for bibliographic records formatted: CREATE TABLE IF NOT EXISTS bibfmt ( id mediumint(8) unsigned NOT NULL auto_increment, id_bibrec int(8) unsigned NOT NULL default '0', format varchar(10) NOT NULL default '', last_updated datetime NOT NULL default '0000-00-00', value longblob, PRIMARY KEY (id), KEY id_bibrec (id_bibrec), KEY format (format) ) ENGINE=MyISAM; -- tables for index files: CREATE TABLE IF NOT EXISTS idxINDEX ( id mediumint(9) unsigned NOT NULL, name varchar(50) NOT NULL default '', description varchar(255) NOT NULL default '', last_updated datetime NOT NULL default '0000-00-00 00:00:00', stemming_language varchar(10) NOT NULL default '', PRIMARY KEY (id), UNIQUE KEY name (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxINDEXNAME ( id_idxINDEX mediumint(9) unsigned NOT NULL, ln char(5) NOT NULL default '', type char(3) NOT NULL default 'sn', value varchar(255) NOT NULL, PRIMARY KEY (id_idxINDEX,ln,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxINDEX_field ( id_idxINDEX mediumint(9) unsigned NOT NULL, id_field mediumint(9) unsigned NOT NULL, regexp_punctuation varchar(255) NOT NULL default "[\.\,\:\;\?\!\"]", regexp_alphanumeric_separators varchar(255) NOT NULL default "[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]", PRIMARY KEY (id_idxINDEX,id_field) ) ENGINE=MyISAM; -- this comment line here is just to fix the SQL display mode in Emacs ' CREATE TABLE IF NOT EXISTS idxWORD01F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD01R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD02F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD02R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD03F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD03R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD04F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD04R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD05F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD05R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD06F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD06R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD07F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD07R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD08F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD08R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD09F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD09R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD10F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD10R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD11F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD11R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD12F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD12R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD13F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD13R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD14F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD14R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD15F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD15R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD16F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD16R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD17F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD17R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD18F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxWORD18R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR01F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR01R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR02F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR02R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR03F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR03R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR04F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR04R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR05F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR05R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR06F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR06R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR07F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR07R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR08F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR08R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR09F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR09R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR10F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR10R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR11F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR11R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR12F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR12R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR13F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR13R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR14F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR14R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR15F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR15R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR16F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR16R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR17F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR17R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR18F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(100) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPAIR18R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE01F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE01R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE02F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE02R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE03F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE03R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE04F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE04R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE05F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE05R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE06F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE06R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE07F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE07R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE08F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE08R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE09F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE09R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE10F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE10R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE11F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE11R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE12F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE12R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE13F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE13R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE14F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE14R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE15F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE15R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE16F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE16R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE17F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE17R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE18F ( id mediumint(9) unsigned NOT NULL auto_increment, term text default NULL, hitlist longblob, PRIMARY KEY (id), KEY term (term(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS idxPHRASE18R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; -- tables for ranking: CREATE TABLE IF NOT EXISTS rnkMETHOD ( id mediumint(9) unsigned NOT NULL auto_increment, name varchar(20) NOT NULL default '', last_updated datetime NOT NULL default '0000-00-00 00:00:00', PRIMARY KEY (id), UNIQUE KEY name (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS rnkMETHODNAME ( id_rnkMETHOD mediumint(9) unsigned NOT NULL, ln char(5) NOT NULL default '', type char(3) NOT NULL default 'sn', value varchar(255) NOT NULL, PRIMARY KEY (id_rnkMETHOD,ln,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS rnkMETHODDATA ( id_rnkMETHOD mediumint(9) unsigned NOT NULL, relevance_data longblob, PRIMARY KEY (id_rnkMETHOD) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS collection_rnkMETHOD ( id_collection mediumint(9) unsigned NOT NULL, id_rnkMETHOD mediumint(9) unsigned NOT NULL, score tinyint(4) unsigned NOT NULL default '0', PRIMARY KEY (id_collection,id_rnkMETHOD) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS rnkWORD01F ( id mediumint(9) unsigned NOT NULL auto_increment, term varchar(50) default NULL, hitlist longblob, PRIMARY KEY (id), UNIQUE KEY term (term) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS rnkWORD01R ( id_bibrec mediumint(9) unsigned NOT NULL, termlist longblob, type enum('CURRENT','FUTURE','TEMPORARY') NOT NULL default 'CURRENT', PRIMARY KEY (id_bibrec,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS rnkAUTHORDATA ( aterm varchar(50) default NULL, hitlist longblob, UNIQUE KEY aterm (aterm) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS rnkPAGEVIEWS ( id_bibrec mediumint(8) unsigned default NULL, id_user int(15) unsigned default '0', client_host int(10) unsigned default NULL, view_time datetime default '0000-00-00 00:00:00', KEY view_time (view_time), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS rnkDOWNLOADS ( id_bibrec mediumint(8) unsigned default NULL, download_time datetime default '0000-00-00 00:00:00', client_host int(10) unsigned default NULL, id_user int(15) unsigned default NULL, id_bibdoc mediumint(9) unsigned default NULL, file_version smallint(2) unsigned default NULL, file_format varchar(10) NULL default NULL, KEY download_time (download_time), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; -- a table for citations. record-cites-record CREATE TABLE IF NOT EXISTS rnkCITATIONDATA ( id mediumint(8) unsigned NOT NULL auto_increment, object_name varchar(255) NOT NULL, object_value longblob, last_updated datetime NOT NULL default '0000-00-00', PRIMARY KEY id (id), UNIQUE KEY object_name (object_name) ) ENGINE=MyISAM; -- a table for missing citations. This should be scanned by a program -- occasionally to check if some publication has been cited more than -- 50 times (or such), and alert cataloguers to create record for that -- external citation -- -- id_bibrec is the id of the record. extcitepubinfo is publication info -- that looks in general like hep-th/0112088 CREATE TABLE IF NOT EXISTS rnkCITATIONDATAEXT ( id_bibrec int(8) unsigned, extcitepubinfo varchar(255) NOT NULL, PRIMARY KEY (id_bibrec, extcitepubinfo), KEY extcitepubinfo (extcitepubinfo) ) ENGINE=MyISAM; -- tables for collections and collection tree: CREATE TABLE IF NOT EXISTS collection ( id mediumint(9) unsigned NOT NULL auto_increment, name varchar(255) NOT NULL, dbquery text, nbrecs int(10) unsigned default '0', reclist longblob, PRIMARY KEY (id), UNIQUE KEY name (name), KEY dbquery (dbquery(50)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS collectionname ( id_collection mediumint(9) unsigned NOT NULL, ln char(5) NOT NULL default '', type char(3) NOT NULL default 'sn', value varchar(255) NOT NULL, PRIMARY KEY (id_collection,ln,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS collection_collection ( id_dad mediumint(9) unsigned NOT NULL, id_son mediumint(9) unsigned NOT NULL, type char(1) NOT NULL default 'r', score tinyint(4) unsigned NOT NULL default '0', PRIMARY KEY (id_dad,id_son) ) ENGINE=MyISAM; -- tables for OAI sets: CREATE TABLE IF NOT EXISTS oaiREPOSITORY ( id mediumint(9) unsigned NOT NULL auto_increment, setName varchar(255) NOT NULL default '', setSpec varchar(255) NOT NULL default 'GLOBAL_SET', setCollection varchar(255) NOT NULL default '', setDescription text NOT NULL default '', setDefinition text NOT NULL default '', setRecList longblob, p1 text NOT NULL default '', f1 text NOT NULL default '', m1 text NOT NULL default '', p2 text NOT NULL default '', f2 text NOT NULL default '', m2 text NOT NULL default '', p3 text NOT NULL default '', f3 text NOT NULL default '', m3 text NOT NULL default '', PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS oaiHARVEST ( id mediumint(9) unsigned NOT NULL auto_increment, baseurl varchar(255) NOT NULL default '', metadataprefix varchar(255) NOT NULL default 'oai_dc', arguments text, comment text, bibconvertcfgfile varchar(255), name varchar(255) NOT NULL, lastrun datetime, frequency mediumint(12) NOT NULL default '0', postprocess varchar(20) NOT NULL default 'h', bibfilterprogram varchar(255) NOT NULL default '', setspecs text NOT NULL default '', PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS oaiHARVESTLOG ( id_oaiHARVEST mediumint(9) unsigned NOT NULL REFERENCES oaiHARVEST, -- source we harvest from id_bibrec mediumint(8) unsigned NOT NULL default '0', -- internal record id ( filled by bibupload ) bibupload_task_id int NOT NULL default 0, -- bib upload task number oai_id varchar(40) NOT NULL default "", -- OAI record identifier we harvested date_harvested datetime NOT NULL default '0000-00-00', -- when we harvested date_inserted datetime NOT NULL default '0000-00-00', -- when it was inserted inserted_to_db char(1) NOT NULL default 'P', -- where it was inserted (P=prod, H=holding-pen, etc) PRIMARY KEY (bibupload_task_id, oai_id, date_harvested) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibHOLDINGPEN ( changeset_id INT NOT NULL AUTO_INCREMENT, -- the identifier of the changeset stored in the holding pen changeset_date datetime NOT NULL DEFAULT '0000:00:00 00:00:00', -- when was the changeset inserted changeset_xml TEXT NOT NULL DEFAULT '', oai_id varchar(40) NOT NULL DEFAULT '', -- OAI identifier of concerned record id_bibrec mediumint(8) unsigned NOT NULL default '0', -- record ID of concerned record (filled by bibupload) PRIMARY KEY (changeset_id), KEY changeset_date (changeset_date), KEY id_bibrec (id_bibrec) ) ENGINE=MyISAM; -- tables for portal elements: CREATE TABLE IF NOT EXISTS collection_portalbox ( id_collection mediumint(9) unsigned NOT NULL, id_portalbox mediumint(9) unsigned NOT NULL, ln char(5) NOT NULL default '', position char(3) NOT NULL default 'top', score tinyint(4) unsigned NOT NULL default '0', PRIMARY KEY (id_collection,id_portalbox,ln) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS portalbox ( id mediumint(9) unsigned NOT NULL auto_increment, title text NOT NULL, body text NOT NULL, UNIQUE KEY id (id) ) ENGINE=MyISAM; -- tables for search examples: CREATE TABLE IF NOT EXISTS collection_example ( id_collection mediumint(9) unsigned NOT NULL, id_example mediumint(9) unsigned NOT NULL, score tinyint(4) unsigned NOT NULL default '0', PRIMARY KEY (id_collection,id_example) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS example ( id mediumint(9) unsigned NOT NULL auto_increment, type text NOT NULL default '', body text NOT NULL, PRIMARY KEY (id) ) ENGINE=MyISAM; -- tables for collection formats: CREATE TABLE IF NOT EXISTS collection_format ( id_collection mediumint(9) unsigned NOT NULL, id_format mediumint(9) unsigned NOT NULL, score tinyint(4) unsigned NOT NULL default '0', PRIMARY KEY (id_collection,id_format) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS format ( id mediumint(9) unsigned NOT NULL auto_increment, name varchar(255) NOT NULL, code varchar(6) NOT NULL, description varchar(255) default '', content_type varchar(255) default '', visibility tinyint NOT NULL default '1', PRIMARY KEY (id), UNIQUE KEY code (code) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS formatname ( id_format mediumint(9) unsigned NOT NULL, ln char(5) NOT NULL default '', type char(3) NOT NULL default 'sn', value varchar(255) NOT NULL, PRIMARY KEY (id_format,ln,type) ) ENGINE=MyISAM; -- tables for collection detailed page options CREATE TABLE IF NOT EXISTS collectiondetailedrecordpagetabs ( id_collection mediumint(9) unsigned NOT NULL, tabs varchar(255) NOT NULL default '', PRIMARY KEY (id_collection) ) ENGINE=MyISAM; -- tables for search options and MARC tags: CREATE TABLE IF NOT EXISTS collection_field_fieldvalue ( id_collection mediumint(9) unsigned NOT NULL, id_field mediumint(9) unsigned NOT NULL, id_fieldvalue mediumint(9) unsigned, type char(3) NOT NULL default 'src', score tinyint(4) unsigned NOT NULL default '0', score_fieldvalue tinyint(4) unsigned NOT NULL default '0', KEY id_collection (id_collection), KEY id_field (id_field), KEY id_fieldvalue (id_fieldvalue) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS field ( id mediumint(9) unsigned NOT NULL auto_increment, name varchar(255) NOT NULL, code varchar(255) NOT NULL, PRIMARY KEY (id), UNIQUE KEY code (code) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS fieldname ( id_field mediumint(9) unsigned NOT NULL, ln char(5) NOT NULL default '', type char(3) NOT NULL default 'sn', value varchar(255) NOT NULL, PRIMARY KEY (id_field,ln,type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS fieldvalue ( id mediumint(9) unsigned NOT NULL auto_increment, name varchar(255) NOT NULL, value text NOT NULL, PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS field_tag ( id_field mediumint(9) unsigned NOT NULL, id_tag mediumint(9) unsigned NOT NULL, score tinyint(4) unsigned NOT NULL default '0', PRIMARY KEY (id_field,id_tag) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS tag ( id mediumint(9) unsigned NOT NULL auto_increment, name varchar(255) NOT NULL, value char(6) NOT NULL, PRIMARY KEY (id) ) ENGINE=MyISAM; -- tables for file management CREATE TABLE IF NOT EXISTS bibdoc ( id mediumint(9) unsigned NOT NULL auto_increment, status text NOT NULL default '', docname varchar(250) COLLATE utf8_bin NOT NULL default 'file', creation_date datetime NOT NULL default '0000-00-00', modification_date datetime NOT NULL default '0000-00-00', text_extraction_date datetime NOT NULL default '0000-00-00', more_info mediumblob NULL default NULL, PRIMARY KEY (id), KEY docname (docname), KEY creation_date (creation_date), KEY modification_date (modification_date) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibrec_bibdoc ( id_bibrec mediumint(9) unsigned NOT NULL default '0', id_bibdoc mediumint(9) unsigned NOT NULL default '0', type varchar(255), KEY (id_bibrec), KEY (id_bibdoc) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibdoc_bibdoc ( id_bibdoc1 mediumint(9) unsigned NOT NULL, id_bibdoc2 mediumint(9) unsigned NOT NULL, type varchar(255), KEY (id_bibdoc1), KEY (id_bibdoc2) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bibdocfsinfo ( id_bibdoc mediumint(9) unsigned NOT NULL, version tinyint(4) unsigned NOT NULL, format varchar(50) NOT NULL, last_version boolean NOT NULL, cd datetime NOT NULL, md datetime NOT NULL, checksum char(32) NOT NULL, filesize bigint(15) unsigned NOT NULL, mime varchar(100) NOT NULL, master_format varchar(50) NULL default NULL, PRIMARY KEY (id_bibdoc, version, format), KEY (last_version), KEY (format), KEY (cd), KEY (md), KEY (filesize), KEY (mime) ) ENGINE=MyISAM; -- tables for publication requests: CREATE TABLE IF NOT EXISTS publreq ( id int(11) NOT NULL auto_increment, host varchar(255) NOT NULL default '', date varchar(255) NOT NULL default '', name varchar(255) NOT NULL default '', email varchar(255) NOT NULL default '', address text NOT NULL, publication text NOT NULL, PRIMARY KEY (id) ) ENGINE=MyISAM; -- table for sessions and users: CREATE TABLE IF NOT EXISTS session ( session_key varchar(32) NOT NULL default '', session_expiry datetime NOT NULL default '0000-00-00 00:00:00', session_object longblob, uid int(15) unsigned NOT NULL, UNIQUE KEY session_key (session_key), KEY uid (uid), KEY session_expiry (session_expiry) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS user ( id int(15) unsigned NOT NULL auto_increment, email varchar(255) NOT NULL default '', password blob NOT NULL, note varchar(255) default NULL, settings blob default NULL, nickname varchar(255) NOT NULL default '', last_login datetime NOT NULL default '0000-00-00 00:00:00', PRIMARY KEY id (id), KEY email (email), KEY nickname (nickname) ) ENGINE=MyISAM; +CREATE TABLE IF NOT EXISTS userEXT ( + id varbinary(255) NOT NULL, + method varchar(50) NOT NULL, + id_user int(15) unsigned NOT NULL, + PRIMARY KEY (id, method), + UNIQUE KEY (id_user, method) +) ENGINE=MyISAM; + + -- tables for usergroups CREATE TABLE IF NOT EXISTS usergroup ( id int(15) unsigned NOT NULL auto_increment, name varchar(255) NOT NULL default '', description text default '', join_policy char(2) NOT NULL default '', login_method varchar(255) NOT NULL default 'INTERNAL', PRIMARY KEY (id), UNIQUE KEY login_method_name (login_method(70), name), KEY name (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS user_usergroup ( id_user int(15) unsigned NOT NULL default '0', id_usergroup int(15) unsigned NOT NULL default '0', user_status char(1) NOT NULL default '', user_status_date datetime NOT NULL default '0000-00-00 00:00:00', KEY id_user (id_user), KEY id_usergroup (id_usergroup) ) ENGINE=MyISAM; -- tables for access control engine CREATE TABLE IF NOT EXISTS accROLE ( id int(15) unsigned NOT NULL auto_increment, name varchar(32), description varchar(255), firerole_def_ser blob NULL, firerole_def_src text NULL, PRIMARY KEY (id), UNIQUE KEY name (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS user_accROLE ( id_user int(15) unsigned NOT NULL, id_accROLE int(15) unsigned NOT NULL, expiration datetime NOT NULL default '9999-12-31 23:59:59', PRIMARY KEY (id_user, id_accROLE) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS accMAILCOOKIE ( id int(15) unsigned NOT NULL auto_increment, data blob NOT NULL, expiration datetime NOT NULL default '9999-12-31 23:59:59', kind varchar(32) NOT NULL, onetime boolean NOT NULL default 0, status char(1) NOT NULL default 'W', PRIMARY KEY (id), KEY expiration (expiration) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS accACTION ( id int(15) unsigned NOT NULL auto_increment, name varchar(32), description varchar(255), allowedkeywords varchar(255), optional ENUM ('yes', 'no') NOT NULL default 'no', PRIMARY KEY (id), UNIQUE KEY name (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS accARGUMENT ( id int(15) unsigned NOT NULL auto_increment, keyword varchar (32), value varchar(255), PRIMARY KEY (id), KEY KEYVAL (keyword, value) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS accROLE_accACTION_accARGUMENT ( id_accROLE int(15), id_accACTION int(15), id_accARGUMENT int(15), argumentlistid mediumint(8), KEY id_accROLE (id_accROLE), KEY id_accACTION (id_accACTION), KEY id_accARGUMENT (id_accARGUMENT) ) ENGINE=MyISAM; -- tables for personal/collaborative features (baskets, alerts, searches, messages, usergroups): CREATE TABLE IF NOT EXISTS user_query ( id_user int(15) unsigned NOT NULL default '0', id_query int(15) unsigned NOT NULL default '0', hostname varchar(50) default 'unknown host', date datetime default NULL, KEY id_user (id_user,id_query) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS query ( id int(15) unsigned NOT NULL auto_increment, type char(1) NOT NULL default 'r', urlargs text NOT NULL, PRIMARY KEY (id), KEY urlargs (urlargs(100)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS user_query_basket ( id_user int(15) unsigned NOT NULL default '0', id_query int(15) unsigned NOT NULL default '0', id_basket int(15) unsigned NOT NULL default '0', frequency varchar(5) NOT NULL default '', date_creation date default NULL, date_lastrun date default '0000-00-00', alert_name varchar(30) NOT NULL default '', alert_desc text default NULL, notification char(1) NOT NULL default 'y', PRIMARY KEY (id_user,id_query,frequency,id_basket), KEY alert_name (alert_name) ) ENGINE=MyISAM; -- baskets CREATE TABLE IF NOT EXISTS bskBASKET ( id int(15) unsigned NOT NULL auto_increment, id_owner int(15) unsigned NOT NULL default '0', name varchar(50) NOT NULL default '', date_modification datetime NOT NULL default '0000-00-00 00:00:00', nb_views int(15) NOT NULL default '0', PRIMARY KEY (id), KEY id_owner (id_owner), KEY name (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bskREC ( id_bibrec_or_bskEXTREC int(16) NOT NULL default '0', id_bskBASKET int(15) unsigned NOT NULL default '0', id_user_who_added_item int(15) NOT NULL default '0', score int(15) NOT NULL default '0', date_added datetime NOT NULL default '0000-00-00 00:00:00', PRIMARY KEY (id_bibrec_or_bskEXTREC,id_bskBASKET), KEY id_bibrec_or_bskEXTREC (id_bibrec_or_bskEXTREC), KEY id_bskBASKET (id_bskBASKET), KEY score (score), KEY date_added (date_added) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bskEXTREC ( id int(15) unsigned NOT NULL auto_increment, external_id int(15) NOT NULL default '0', collection_id int(15) unsigned NOT NULL default '0', original_url text, creation_date datetime NOT NULL default '0000-00-00 00:00:00', modification_date datetime NOT NULL default '0000-00-00 00:00:00', PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bskEXTFMT ( id int(15) unsigned NOT NULL auto_increment, id_bskEXTREC int(15) unsigned NOT NULL default '0', format varchar(10) NOT NULL default '', last_updated datetime NOT NULL default '0000-00-00 00:00:00', value longblob, PRIMARY KEY (id), KEY id_bskEXTREC (id_bskEXTREC), KEY format (format) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS user_bskBASKET ( id_user int(15) unsigned NOT NULL default '0', id_bskBASKET int(15) unsigned NOT NULL default '0', topic varchar(50) NOT NULL default '', PRIMARY KEY (id_user,id_bskBASKET), KEY id_user (id_user), KEY id_bskBASKET (id_bskBASKET) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS usergroup_bskBASKET ( id_usergroup int(15) unsigned NOT NULL default '0', id_bskBASKET int(15) unsigned NOT NULL default '0', topic varchar(50) NOT NULL default '', date_shared datetime NOT NULL default '0000-00-00 00:00:00', share_level char(2) NOT NULL default '', PRIMARY KEY (id_usergroup,id_bskBASKET), KEY id_usergroup (id_usergroup), KEY id_bskBASKET (id_bskBASKET) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bskRECORDCOMMENT ( id int(15) unsigned NOT NULL auto_increment, id_bibrec_or_bskEXTREC int(16) NOT NULL default '0', id_bskBASKET int(15) unsigned NOT NULL default '0', id_user int(15) unsigned NOT NULL default '0', title varchar(255) NOT NULL default '', body text NOT NULL, date_creation datetime NOT NULL default '0000-00-00 00:00:00', priority int(15) NOT NULL default '0', in_reply_to_id_bskRECORDCOMMENT int(15) unsigned NOT NULL default '0', reply_order_cached_data blob NULL default NULL, PRIMARY KEY (id), KEY id_bskBASKET (id_bskBASKET), KEY id_bibrec_or_bskEXTREC (id_bibrec_or_bskEXTREC), KEY date_creation (date_creation), KEY in_reply_to_id_bskRECORDCOMMENT (in_reply_to_id_bskRECORDCOMMENT), INDEX (reply_order_cached_data(40)) ) ENGINE=MyISAM; -- tables for messaging system CREATE TABLE IF NOT EXISTS msgMESSAGE ( id int(15) unsigned NOT NULL auto_increment, id_user_from int(15) unsigned NOT NULL default '0', sent_to_user_nicks text NOT NULL default '', sent_to_group_names text NOT NULL default '', subject text NOT NULL default '', body text default NULL, sent_date datetime NOT NULL default '0000-00-00 00:00:00', received_date datetime NULL default '0000-00-00 00:00:00', PRIMARY KEY id (id), KEY id_user_from (id_user_from) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS user_msgMESSAGE ( id_user_to int(15) unsigned NOT NULL default '0', id_msgMESSAGE int(15) unsigned NOT NULL default '0', status char(1) NOT NULL default 'N', PRIMARY KEY id (id_user_to, id_msgMESSAGE), KEY id_user_to (id_user_to), KEY id_msgMESSAGE (id_msgMESSAGE) ) ENGINE=MyISAM; -- tables for WebComment CREATE TABLE IF NOT EXISTS cmtRECORDCOMMENT ( id int(15) unsigned NOT NULL auto_increment, id_bibrec int(15) unsigned NOT NULL default '0', id_user int(15) unsigned NOT NULL default '0', title varchar(255) NOT NULL default '', body text NOT NULL default '', date_creation datetime NOT NULL default '0000-00-00 00:00:00', star_score tinyint(5) unsigned NOT NULL default '0', nb_votes_yes int(10) NOT NULL default '0', nb_votes_total int(10) unsigned NOT NULL default '0', nb_abuse_reports int(10) NOT NULL default '0', status char(2) NOT NULL default 'ok', round_name varchar(255) NOT NULL default '', restriction varchar(50) NOT NULL default '', in_reply_to_id_cmtRECORDCOMMENT int(15) unsigned NOT NULL default '0', reply_order_cached_data blob NULL default NULL, PRIMARY KEY (id), KEY id_bibrec (id_bibrec), KEY id_user (id_user), KEY status (status), KEY in_reply_to_id_cmtRECORDCOMMENT (in_reply_to_id_cmtRECORDCOMMENT), INDEX (reply_order_cached_data(40)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS cmtACTIONHISTORY ( id_cmtRECORDCOMMENT int(15) unsigned NULL, id_bibrec int(15) unsigned NULL, id_user int(15) unsigned NULL default NULL, client_host int(10) unsigned default NULL, action_time datetime NOT NULL default '0000-00-00 00:00:00', action_code char(1) NOT NULL, KEY id_cmtRECORDCOMMENT (id_cmtRECORDCOMMENT), KEY client_host (client_host), KEY id_user (id_user), KEY action_code (action_code) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS cmtSUBSCRIPTION ( id_bibrec mediumint(8) unsigned NOT NULL, id_user int(15) unsigned NOT NULL, creation_time datetime NOT NULL default '0000-00-00 00:00:00', KEY id_user (id_bibrec, id_user) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS cmtCOLLAPSED ( id_bibrec int(15) unsigned NOT NULL default '0', id_cmtRECORDCOMMENT int(15) unsigned NULL, id_user int(15) unsigned NOT NULL, PRIMARY KEY (id_user, id_bibrec, id_cmtRECORDCOMMENT) ) ENGINE=MyISAM; -- tables for BibKnowledge: CREATE TABLE IF NOT EXISTS knwKB ( id mediumint(8) unsigned NOT NULL auto_increment, name varchar(255) default '', description text default '', kbtype char default NULL, PRIMARY KEY (id), UNIQUE KEY name (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS knwKBRVAL ( id mediumint(8) unsigned NOT NULL auto_increment, m_key varchar(255) NOT NULL default '', m_value text NOT NULL default '', id_knwKB mediumint(8) NOT NULL default '0', PRIMARY KEY (id), KEY id_knwKB (id_knwKB), KEY m_key (m_key(30)), KEY m_value (m_value(30)) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS knwKBDDEF ( id_knwKB mediumint(8) unsigned NOT NULL, id_collection mediumint(9), output_tag text default '', search_expression text default '', PRIMARY KEY (id_knwKB) ) ENGINE=MyISAM; -- tables for WebSubmit: CREATE TABLE IF NOT EXISTS sbmACTION ( lactname text, sactname char(3) NOT NULL default '', dir text, cd date default NULL, md date default NULL, actionbutton text, statustext text, PRIMARY KEY (sactname) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmALLFUNCDESCR ( function varchar(40) NOT NULL default '', description tinytext, PRIMARY KEY (function) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmAPPROVAL ( doctype varchar(10) NOT NULL default '', categ varchar(50) NOT NULL default '', rn varchar(50) NOT NULL default '', status varchar(10) NOT NULL default '', dFirstReq datetime NOT NULL default '0000-00-00 00:00:00', dLastReq datetime NOT NULL default '0000-00-00 00:00:00', dAction datetime NOT NULL default '0000-00-00 00:00:00', access varchar(20) NOT NULL default '0', note text NOT NULL default '', PRIMARY KEY (rn) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmCPLXAPPROVAL ( doctype varchar(10) NOT NULL default '', categ varchar(50) NOT NULL default '', rn varchar(50) NOT NULL default '', type varchar(10) NOT NULL, status varchar(10) NOT NULL, id_group int(15) unsigned NOT NULL default '0', id_bskBASKET int(15) unsigned NOT NULL default '0', id_EdBoardGroup int(15) unsigned NOT NULL default '0', dFirstReq datetime NOT NULL default '0000-00-00 00:00:00', dLastReq datetime NOT NULL default '0000-00-00 00:00:00', dEdBoardSel datetime NOT NULL default '0000-00-00 00:00:00', dRefereeSel datetime NOT NULL default '0000-00-00 00:00:00', dRefereeRecom datetime NOT NULL default '0000-00-00 00:00:00', dEdBoardRecom datetime NOT NULL default '0000-00-00 00:00:00', dPubComRecom datetime NOT NULL default '0000-00-00 00:00:00', dProjectLeaderAction datetime NOT NULL default '0000-00-00 00:00:00', PRIMARY KEY (rn, type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmCOLLECTION ( id int(11) NOT NULL auto_increment, name varchar(100) NOT NULL default '', PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmCOLLECTION_sbmCOLLECTION ( id_father int(11) NOT NULL default '0', id_son int(11) NOT NULL default '0', catalogue_order int(11) NOT NULL default '0' ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmCOLLECTION_sbmDOCTYPE ( id_father int(11) NOT NULL default '0', id_son char(10) NOT NULL default '0', catalogue_order int(11) NOT NULL default '0' ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmCATEGORIES ( doctype varchar(10) NOT NULL default '', sname varchar(75) NOT NULL default '', lname varchar(75) NOT NULL default '', score tinyint unsigned NOT NULL default 0, PRIMARY KEY (doctype, sname), KEY doctype (doctype), KEY sname (sname) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmCHECKS ( chname varchar(15) NOT NULL default '', chdesc text, cd date default NULL, md date default NULL, chefi1 text, chefi2 text, PRIMARY KEY (chname) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmDOCTYPE ( ldocname text, sdocname varchar(10) default NULL, cd date default NULL, md date default NULL, description text ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmFIELD ( subname varchar(13) default NULL, pagenb int(11) default NULL, fieldnb int(11) default NULL, fidesc varchar(15) default NULL, fitext text, level char(1) default NULL, sdesc text, checkn text, cd date default NULL, md date default NULL, fiefi1 text, fiefi2 text ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmFIELDDESC ( name varchar(15) NOT NULL default '', alephcode varchar(50) default NULL, marccode varchar(50) NOT NULL default '', type char(1) default NULL, size int(11) default NULL, rows int(11) default NULL, cols int(11) default NULL, maxlength int(11) default NULL, val text, fidesc text, cd date default NULL, md date default NULL, modifytext text, fddfi2 text, cookie int(11) default '0', PRIMARY KEY (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmFORMATEXTENSION ( FILE_FORMAT text NOT NULL, FILE_EXTENSION text NOT NULL ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmFUNCTIONS ( action varchar(10) NOT NULL default '', doctype varchar(10) NOT NULL default '', function varchar(40) NOT NULL default '', score int(11) NOT NULL default '0', step tinyint(4) NOT NULL default '1' ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmFUNDESC ( function varchar(40) NOT NULL default '', param varchar(40) default NULL ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmGFILERESULT ( FORMAT text NOT NULL, RESULT text NOT NULL ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmIMPLEMENT ( docname varchar(10) default NULL, actname char(3) default NULL, displayed char(1) default NULL, subname varchar(13) default NULL, nbpg int(11) default NULL, cd date default NULL, md date default NULL, buttonorder int(11) default NULL, statustext text, level char(1) NOT NULL default '', score int(11) NOT NULL default '0', stpage int(11) NOT NULL default '0', endtxt varchar(100) NOT NULL default '' ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmPARAMETERS ( doctype varchar(10) NOT NULL default '', name varchar(40) NOT NULL default '', value text NOT NULL default '', PRIMARY KEY (doctype,name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmPUBLICATION ( doctype varchar(10) NOT NULL default '', categ varchar(50) NOT NULL default '', rn varchar(50) NOT NULL default '', status varchar(10) NOT NULL default '', dFirstReq datetime NOT NULL default '0000-00-00 00:00:00', dLastReq datetime NOT NULL default '0000-00-00 00:00:00', dAction datetime NOT NULL default '0000-00-00 00:00:00', accessref varchar(20) NOT NULL default '', accessedi varchar(20) NOT NULL default '', access varchar(20) NOT NULL default '', referees varchar(50) NOT NULL default '', authoremail varchar(50) NOT NULL default '', dRefSelection datetime NOT NULL default '0000-00-00 00:00:00', dRefRec datetime NOT NULL default '0000-00-00 00:00:00', dEdiRec datetime NOT NULL default '0000-00-00 00:00:00', accessspo varchar(20) NOT NULL default '', journal varchar(100) default NULL, PRIMARY KEY (doctype,categ,rn) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmPUBLICATIONCOMM ( id int(11) NOT NULL auto_increment, id_parent int(11) default '0', rn varchar(100) NOT NULL default '', firstname varchar(100) default NULL, secondname varchar(100) default NULL, email varchar(100) default NULL, date varchar(40) NOT NULL default '', synopsis varchar(255) NOT NULL default '', commentfulltext text, PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmPUBLICATIONDATA ( doctype varchar(10) NOT NULL default '', editoboard varchar(250) NOT NULL default '', base varchar(10) NOT NULL default '', logicalbase varchar(10) NOT NULL default '', spokesperson varchar(50) NOT NULL default '', PRIMARY KEY (doctype) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmREFEREES ( doctype varchar(10) NOT NULL default '', categ varchar(10) NOT NULL default '', name varchar(50) NOT NULL default '', address varchar(50) NOT NULL default '', rid int(11) NOT NULL auto_increment, PRIMARY KEY (rid) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmSUBMISSIONS ( email varchar(50) NOT NULL default '', doctype varchar(10) NOT NULL default '', action varchar(10) NOT NULL default '', status varchar(10) NOT NULL default '', id varchar(30) NOT NULL default '', reference varchar(40) NOT NULL default '', cd datetime NOT NULL default '0000-00-00 00:00:00', md datetime NOT NULL default '0000-00-00 00:00:00' ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS sbmCOOKIES ( id int(15) unsigned NOT NULL auto_increment, name varchar(100) NOT NULL, value text, uid int(15) NOT NULL, PRIMARY KEY (id) ) ENGINE=MyISAM; -- Scheduler tables CREATE TABLE IF NOT EXISTS schTASK ( id int(15) unsigned NOT NULL auto_increment, proc varchar(255) NOT NULL, host varchar(255) NOT NULL default '', user varchar(50) NOT NULL, runtime datetime NOT NULL, sleeptime varchar(20), arguments mediumblob, status varchar(50), progress varchar(255), priority tinyint(4) NOT NULL default 0, sequenceid int(8) NULL default NULL, PRIMARY KEY (id), KEY status (status), KEY runtime (runtime), KEY priority (priority), KEY sequenceid (sequenceid) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS hstTASK ( id int(15) unsigned NOT NULL, proc varchar(255) NOT NULL, host varchar(255) NOT NULL default '', user varchar(50) NOT NULL, runtime datetime NOT NULL, sleeptime varchar(20), arguments mediumblob, status varchar(50), progress varchar(255), priority tinyint(4) NOT NULL default 0, sequenceid int(8) NULL default NULL, PRIMARY KEY (id), KEY status (status), KEY runtime (runtime), KEY priority (priority), KEY sequenceid (sequenceid) ) ENGINE=MyISAM; -- Batch Upload History CREATE TABLE IF NOT EXISTS hstBATCHUPLOAD ( id int(15) unsigned NOT NULL auto_increment, user varchar(50) NOT NULL, submitdate datetime NOT NULL, filename varchar(255) NOT NULL, execdate datetime NOT NULL, id_schTASK int(15) unsigned NOT NULL, batch_mode varchar(15) NOT NULL, PRIMARY KEY (id), KEY user (user) ) ENGINE=MyISAM; -- External collections CREATE TABLE IF NOT EXISTS collection_externalcollection ( id_collection mediumint(9) unsigned NOT NULL default '0', id_externalcollection mediumint(9) unsigned NOT NULL default '0', type tinyint(4) unsigned NOT NULL default '0', PRIMARY KEY (id_collection, id_externalcollection) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS externalcollection ( id mediumint(9) unsigned NOT NULL auto_increment, name varchar(255) NOT NULL default '', PRIMARY KEY (id), UNIQUE KEY name (name) ) ENGINE=MyISAM; -- WebStat tables: CREATE TABLE IF NOT EXISTS staEVENT ( id varchar(255) NOT NULL, number smallint(2) unsigned ZEROFILL NOT NULL auto_increment, name varchar(255), creation_time TIMESTAMP DEFAULT NOW(), cols varchar(255), PRIMARY KEY (id), UNIQUE KEY number (number) ) ENGINE=MyISAM; -- BibClassify tables: CREATE TABLE IF NOT EXISTS clsMETHOD ( id mediumint(9) unsigned NOT NULL, name varchar(50) NOT NULL default '', location varchar(255) NOT NULL default '', description varchar(255) NOT NULL default '', last_updated datetime NOT NULL default '0000-00-00 00:00:00', PRIMARY KEY (id), UNIQUE KEY name (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS collection_clsMETHOD ( id_collection mediumint(9) unsigned NOT NULL, id_clsMETHOD mediumint(9) unsigned NOT NULL, PRIMARY KEY (id_collection, id_clsMETHOD) ) ENGINE=MyISAM; -- WebJournal tables: CREATE TABLE IF NOT EXISTS jrnJOURNAL ( id mediumint(9) unsigned NOT NULL auto_increment, name varchar(50) NOT NULL default '', PRIMARY KEY (id), UNIQUE KEY name (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS jrnISSUE ( id_jrnJOURNAL mediumint(9) unsigned NOT NULL, issue_number varchar(50) NOT NULL default '', issue_display varchar(50) NOT NULL default '', date_released datetime NOT NULL default '0000-00-00 00:00:00', date_announced datetime NOT NULL default '0000-00-00 00:00:00', PRIMARY KEY (id_jrnJOURNAL,issue_number) ) ENGINE=MyISAM; -- tables recording history of record's metadata and fulltext documents: CREATE TABLE IF NOT EXISTS hstRECORD ( id_bibrec mediumint(8) unsigned NOT NULL, marcxml blob NOT NULL, job_id mediumint(15) unsigned NOT NULL, job_name varchar(255) NOT NULL, job_person varchar(255) NOT NULL, job_date datetime NOT NULL, job_details blob NOT NULL, KEY (id_bibrec), KEY (job_id), KEY (job_name), KEY (job_person), KEY (job_date) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS hstDOCUMENT ( id_bibdoc mediumint(9) unsigned NOT NULL, docname varchar(250) NOT NULL, docformat varchar(50) NOT NULL, docversion tinyint(4) unsigned NOT NULL, docsize bigint(15) unsigned NOT NULL, docchecksum char(32) NOT NULL, doctimestamp datetime NOT NULL, action varchar(50) NOT NULL, job_id mediumint(15) unsigned NULL default NULL, job_name varchar(255) NULL default NULL, job_person varchar(255) NULL default NULL, job_date datetime NULL default NULL, job_details blob NULL default NULL, KEY (action), KEY (id_bibdoc), KEY (docname), KEY (docformat), KEY (doctimestamp), KEY (job_id), KEY (job_name), KEY (job_person), KEY (job_date) ) ENGINE=MyISAM; -- BibCirculation tables: CREATE TABLE IF NOT EXISTS crcBORROWER ( id int(15) unsigned NOT NULL auto_increment, ccid int(15) unsigned NULL default NULL, name varchar(255) NOT NULL default '', email varchar(255) NOT NULL default '', phone varchar(60) default NULL, address varchar(60) default NULL, mailbox varchar(30) default NULL, borrower_since datetime NOT NULL default '0000-00-00 00:00:00', borrower_until datetime NOT NULL default '0000-00-00 00:00:00', notes text, PRIMARY KEY (id), UNIQUE KEY (ccid), KEY (name), KEY (email) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS crcILLREQUEST ( id int(15) unsigned NOT NULL auto_increment, id_crcBORROWER int(15) unsigned NOT NULL default '0', barcode varchar(30) NOT NULL default '', period_of_interest_from datetime NOT NULL default '0000-00-00 00:00:00', period_of_interest_to datetime NOT NULL default '0000-00-00 00:00:00', id_crcLIBRARY int(15) unsigned NOT NULL default '0', request_date datetime NOT NULL default '0000-00-00 00:00:00', expected_date datetime NOT NULL default '0000-00-00 00:00:00', arrival_date datetime NOT NULL default '0000-00-00 00:00:00', due_date datetime NOT NULL default '0000-00-00 00:00:00', return_date datetime NOT NULL default '0000-00-00 00:00:00', status varchar(20) NOT NULL default '', cost varchar(30) NOT NULL default '', budget_code varchar(60) NOT NULL default '', item_info text, request_type text, borrower_comments text, only_this_edition varchar(10) NOT NULL default '', library_notes text, PRIMARY KEY (id), KEY id_crcborrower (id_crcBORROWER), KEY id_crclibrary (id_crcLIBRARY) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS crcITEM ( barcode varchar(30) NOT NULL default '', id_bibrec int(15) unsigned NOT NULL default '0', id_crcLIBRARY int(15) unsigned NOT NULL default '0', collection varchar(60) default NULL, location varchar(60) default NULL, description varchar(60) default NULL, loan_period varchar(30) NOT NULL default '', status varchar(20) NOT NULL default '', expected_arrival_date varchar(60) NOT NULL default '', creation_date datetime NOT NULL default '0000-00-00 00:00:00', modification_date datetime NOT NULL default '0000-00-00 00:00:00', number_of_requests int(3) unsigned NOT NULL default '0', PRIMARY KEY (barcode), KEY id_bibrec (id_bibrec), KEY id_crclibrary (id_crcLIBRARY) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS crcLIBRARY ( id int(15) unsigned NOT NULL auto_increment, name varchar(80) NOT NULL default '', address varchar(255) NOT NULL default '', email varchar(255) NOT NULL default '', phone varchar(30) NOT NULL default '', type varchar(30) default NULL, notes text, PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS crcLOAN ( id int(15) unsigned NOT NULL auto_increment, id_crcBORROWER int(15) unsigned NOT NULL default '0', id_bibrec int(15) unsigned NOT NULL default '0', barcode varchar(30) NOT NULL default '', loaned_on datetime NOT NULL default '0000-00-00 00:00:00', returned_on date NOT NULL default '0000-00-00', due_date datetime NOT NULL default '0000-00-00 00:00:00', number_of_renewals int(3) unsigned NOT NULL default '0', overdue_letter_number int(3) unsigned NOT NULL default '0', overdue_letter_date datetime NOT NULL default '0000-00-00 00:00:00', status varchar(20) NOT NULL default '', type varchar(20) NOT NULL default '', notes text, PRIMARY KEY (id), KEY id_crcborrower (id_crcBORROWER), KEY id_bibrec (id_bibrec), KEY barcode (barcode) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS crcLOANREQUEST ( id int(15) unsigned NOT NULL auto_increment, id_crcBORROWER int(15) unsigned NOT NULL default '0', id_bibrec int(15) unsigned NOT NULL default '0', barcode varchar(30) NOT NULL default '', period_of_interest_from datetime NOT NULL default '0000-00-00 00:00:00', period_of_interest_to datetime NOT NULL default '0000-00-00 00:00:00', status varchar(20) NOT NULL default '', notes text, request_date datetime NOT NULL default '0000-00-00 00:00:00', PRIMARY KEY (id), KEY id_crcborrower (id_crcBORROWER), KEY id_bibrec (id_bibrec), KEY barcode (barcode) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS crcPURCHASE ( id int(15) unsigned NOT NULL auto_increment, id_bibrec int(15) unsigned NOT NULL default '0', id_crcVENDOR int(15) unsigned NOT NULL default '0', ordered_date datetime NOT NULL default '0000-00-00 00:00:00', expected_date datetime NOT NULL default '0000-00-00 00:00:00', price varchar(20) NOT NULL default '0', status varchar(20) NOT NULL default '', notes text, PRIMARY KEY (id), KEY id_bibrec (id_bibrec), KEY id_crcVENDOR (id_crcVENDOR) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS crcVENDOR ( id int(15) unsigned NOT NULL auto_increment, name varchar(80) NOT NULL default '', address varchar(255) NOT NULL default '', email varchar(255) NOT NULL default '', phone varchar(30) NOT NULL default '', notes text, PRIMARY KEY (id) ) ENGINE=MyISAM; -- BibExport tables: CREATE TABLE IF NOT EXISTS expJOB ( id int(15) unsigned NOT NULL auto_increment, jobname varchar(50) NOT NULL default '', jobfreq mediumint(12) NOT NULL default '0', output_format mediumint(12) NOT NULL default '0', deleted mediumint(12) NOT NULL default '0', lastrun datetime NOT NULL default '0000-00-00 00:00:00', output_directory text, PRIMARY KEY (id), UNIQUE KEY jobname (jobname) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS expQUERY ( id int(15) unsigned NOT NULL auto_increment, name varchar(255) NOT NULL, search_criteria text NOT NULL, output_fields text NOT NULL, notes text, deleted mediumint(12) NOT NULL default '0', PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS expJOB_expQUERY ( id_expJOB int(15) NOT NULL, id_expQUERY int(15) NOT NULL, PRIMARY KEY (id_expJOB,id_expQUERY), KEY id_expJOB (id_expJOB), KEY id_expQUERY (id_expQUERY) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS expQUERYRESULT ( id int(15) unsigned NOT NULL auto_increment, id_expQUERY int(15) NOT NULL, result text NOT NULL, status mediumint(12) NOT NULL default '0', status_message text NOT NULL, PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS expJOBRESULT ( id int(15) unsigned NOT NULL auto_increment, id_expJOB int(15) NOT NULL, execution_time datetime NOT NULL default '0000-00-00 00:00:00', status mediumint(12) NOT NULL default '0', status_message text NOT NULL, PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS expJOBRESULT_expQUERYRESULT ( id_expJOBRESULT int(15) NOT NULL, id_expQUERYRESULT int(15) NOT NULL, PRIMARY KEY (id_expJOBRESULT, id_expQUERYRESULT), KEY id_expJOBRESULT (id_expJOBRESULT), KEY id_expQUERYRESULT (id_expQUERYRESULT) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS user_expJOB ( id_user int(15) NOT NULL, id_expJOB int(15) NOT NULL, PRIMARY KEY (id_user, id_expJOB), KEY id_user (id_user), KEY id_expJOB (id_expJOB) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS swrREMOTESERVER ( id int(15) unsigned NOT NULL auto_increment, name varchar(50) unique NOT NULL, host varchar(50) NOT NULL, username varchar(50) NOT NULL, password varchar(50) NOT NULL, email varchar(50) NOT NULL, realm varchar(50) NOT NULL, url_base_record varchar(50) NOT NULL, url_servicedocument varchar(80) NOT NULL, xml_servicedocument longblob, last_update int(15) unsigned NOT NULL, PRIMARY KEY (id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS swrCLIENTDATA ( id int(15) unsigned NOT NULL auto_increment, id_swrREMOTESERVER int(15) NOT NULL, id_record int(15) NOT NULL, report_no varchar(50) NOT NULL, id_remote varchar(50) NOT NULL, id_user int(15) NOT NULL, user_name varchar(100) NOT NULL, user_email varchar(100) NOT NULL, xml_media_deposit longblob NOT NULL, xml_metadata_submit longblob NOT NULL, submission_date datetime NOT NULL default '0000-00-00 00:00:00', publication_date datetime NOT NULL default '0000-00-00 00:00:00', removal_date datetime NOT NULL default '0000-00-00 00:00:00', link_medias varchar(150) NOT NULL, link_metadata varchar(150) NOT NULL, link_status varchar(150) NOT NULL, status varchar(150) NOT NULL default 'submitted', last_update datetime NOT NULL, PRIMARY KEY (id) ) ENGINE=MyISAM; -- tables for exception management -- This table is used to log exceptions -- to discover the full details of an exception either check the email -- that are sent to CFG_SITE_ADMIN_EMAIL or look into invenio.err CREATE TABLE IF NOT EXISTS hstEXCEPTION ( id int(15) unsigned NOT NULL auto_increment, name varchar(50) NOT NULL, -- name of the exception filename varchar(255) NULL, -- file where the exception was raised line int(9) NULL, -- line at which the exception was raised last_seen datetime NOT NULL default '0000-00-00 00:00:00', -- last time this exception has been seen last_notified datetime NOT NULL default '0000-00-00 00:00:00', -- last time this exception has been notified counter int(15) NOT NULL default 0, -- internal counter to decide when to notify this exception total int(15) NOT NULL default 0, -- total number of times this exception has been seen PRIMARY KEY (id), KEY (last_seen), KEY (last_notified), KEY (total), UNIQUE KEY (name(50), filename(255), line) ) ENGINE=MyISAM; -- tables for BibAuthorID module: CREATE TABLE IF NOT EXISTS `aidPERSONIDPAPERS` ( `personid` BIGINT( 16 ) UNSIGNED NOT NULL , `bibref_table` ENUM( '100', '700' ) NOT NULL , `bibref_value` MEDIUMINT( 8 ) UNSIGNED NOT NULL , `bibrec` MEDIUMINT( 8 ) UNSIGNED NOT NULL , `name` VARCHAR( 256 ) NOT NULL , `flag` SMALLINT( 2 ) NOT NULL DEFAULT '0' , `lcul` SMALLINT( 2 ) NOT NULL DEFAULT '0' , `last_updated` TIMESTAMP ON UPDATE CURRENT_TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP , INDEX `personid-b` (`personid`) , INDEX `reftable-b` (`bibref_table`) , INDEX `refvalue-b` (`bibref_value`) , INDEX `rec-b` (`bibrec`) , INDEX `name-b` (`name`) , INDEX `pn-b` (`personid`, `name`) , INDEX `timestamp-b` (`last_updated`) , INDEX `flag-b` (`flag`) , INDEX `ptvrf-b` (`personid`, `bibref_table`, `bibref_value`, `bibrec`, `flag`) ) ENGINE=MYISAM; CREATE TABLE IF NOT EXISTS `aidRESULTS` ( `personid` VARCHAR( 256 ) NOT NULL , `bibref_table` ENUM( '100', '700' ) NOT NULL , `bibref_value` MEDIUMINT( 8 ) UNSIGNED NOT NULL , `bibrec` MEDIUMINT( 8 ) UNSIGNED NOT NULL , INDEX `personid-b` (`personid`) , INDEX `reftable-b` (`bibref_table`) , INDEX `refvalue-b` (`bibref_value`) , INDEX `rec-b` (`bibrec`) ) ENGINE=MYISAM; CREATE TABLE IF NOT EXISTS `aidPERSONIDDATA` ( `personid` BIGINT( 16 ) UNSIGNED NOT NULL , `tag` VARCHAR( 64 ) NOT NULL , `data` VARCHAR( 256 ) NOT NULL , `opt1` MEDIUMINT( 8 ) NULL DEFAULT NULL , `opt2` MEDIUMINT( 8 ) NULL DEFAULT NULL , `opt3` VARCHAR( 256 ) NULL DEFAULT NULL , INDEX `personid-b` (`personid`) , INDEX `tag-b` (`tag`) , INDEX `data-b` (`data`) , INDEX `opt1` (`opt1`) ) ENGINE=MYISAM; CREATE TABLE IF NOT EXISTS `aidUSERINPUTLOG` ( `id` bigint(15) NOT NULL AUTO_INCREMENT, `transactionid` bigint(15) NOT NULL, `timestamp` datetime NOT NULL, + `userid` int, `userinfo` varchar(255) NOT NULL, `personid` bigint(15) NOT NULL, `action` varchar(50) NOT NULL, `tag` varchar(50) NOT NULL, `value` varchar(200) NOT NULL, `comment` text, PRIMARY KEY (`id`), INDEX `transactionid-b` (`transactionid`), INDEX `timestamp-b` (`timestamp`), INDEX `userinfo-b` (`userinfo`), + INDEX `userid-b` (`userid`), INDEX `personid-b` (`personid`), INDEX `action-b` (`action`), INDEX `tag-b` (`tag`), INDEX `value-b` (`value`) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS `aidCACHE` ( `id` int(15) NOT NULL auto_increment, `object_name` varchar(120) NOT NULL, `object_key` varchar(120) NOT NULL, `object_value` text, `last_updated` datetime NOT NULL, PRIMARY KEY (`id`), INDEX `name-b` (`object_name`), INDEX `key-b` (`object_key`), INDEX `last_updated-b` (`last_updated`) ) ENGINE=MyISAM; -CREATE TABLE IF NOT EXISTS `aidPROBCACHE` ( - `cluster` VARCHAR( 256 ) NOT NULL , - `bibmap` MEDIUMBLOB NOT NULL , - `matrix` LONGBLOB NOT NULL , - PRIMARY KEY ( `cluster` ) -) ENGINE = MYISAM ; - -- refextract tables: CREATE TABLE IF NOT EXISTS `xtrJOB` ( `id` tinyint(4) NOT NULL AUTO_INCREMENT, `name` varchar(30) NOT NULL, `last_updated` datetime NOT NULL, PRIMARY KEY (`id`) ) ENGINE=MyISAM; -- tables for bibsort module CREATE TABLE IF NOT EXISTS bsrMETHOD ( id mediumint(8) unsigned NOT NULL auto_increment, name varchar(20) NOT NULL, definition varchar(255) NOT NULL, washer varchar(255) NOT NULL, PRIMARY KEY (id), UNIQUE KEY (name) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bsrMETHODNAME ( id_bsrMETHOD mediumint(8) unsigned NOT NULL, ln char(5) NOT NULL default '', type char(3) NOT NULL default 'sn', value varchar(255) NOT NULL, PRIMARY KEY (id_bsrMETHOD, ln, type) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bsrMETHODDATA ( id_bsrMETHOD mediumint(8) unsigned NOT NULL, data_dict longblob, data_dict_ordered longblob, data_list_sorted longblob, last_updated datetime, PRIMARY KEY (id_bsrMETHOD) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS bsrMETHODDATABUCKET ( id_bsrMETHOD mediumint(8) unsigned NOT NULL, bucket_no tinyint(2) NOT NULL, bucket_data longblob, bucket_last_value varchar(255), last_updated datetime, PRIMARY KEY (id_bsrMETHOD, bucket_no) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS collection_bsrMETHOD ( id_collection mediumint(9) unsigned NOT NULL, id_bsrMETHOD mediumint(9) unsigned NOT NULL, score tinyint(4) unsigned NOT NULL default '0', PRIMARY KEY (id_collection, id_bsrMETHOD) ) ENGINE=MyISAM; -- tables for sequence storage CREATE TABLE IF NOT EXISTS seqSTORE ( id int(15) NOT NULL auto_increment, seq_name varchar(15), seq_value varchar(20), PRIMARY KEY (id), UNIQUE KEY seq_name_value (seq_name, seq_value) ) ENGINE=MyISAM; -- tables for linkbacks: CREATE TABLE IF NOT EXISTS lnkENTRY ( id int(15) NOT NULL auto_increment, origin_url varchar(100) NOT NULL, -- url of the originating resource id_bibrec mediumint(8) unsigned NOT NULL, -- bibrecord additional_properties longblob, type varchar(30) NOT NULL, status varchar(30) NOT NULL default 'PENDING', insert_time datetime default '0000-00-00 00:00:00', PRIMARY KEY (id), INDEX (id_bibrec), INDEX (type), INDEX (status), INDEX (insert_time) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS lnkENTRYURLTITLE ( id int(15) unsigned NOT NULL auto_increment, url varchar(100) NOT NULL, title varchar(100) NOT NULL, manual_set boolean NOT NULL default 0, broken_count int(5) default 0, broken boolean NOT NULL default 0, PRIMARY KEY (id), UNIQUE (url), INDEX (title) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS lnkENTRYLOG ( id_lnkENTRY int(15) unsigned NOT NULL, id_lnkLOG int(15) unsigned NOT NULL, FOREIGN KEY (id_lnkENTRY) REFERENCES lnkENTRY(id), FOREIGN KEY (id_lnkLOG) REFERENCES lnkLOG(id) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS lnkLOG ( id int(15) unsigned NOT NULL auto_increment, id_user int(15) unsigned, action varchar(30) NOT NULL, log_time datetime default '0000-00-00 00:00:00', PRIMARY KEY (id), INDEX (id_user), INDEX (action), INDEX (log_time) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS lnkADMINURL ( id int(15) unsigned NOT NULL auto_increment, url varchar(100) NOT NULL, list varchar(30) NOT NULL, PRIMARY KEY (id), UNIQUE (url), INDEX (list) ) ENGINE=MyISAM; CREATE TABLE IF NOT EXISTS lnkADMINURLLOG ( id_lnkADMINURL int(15) unsigned NOT NULL, id_lnkLOG int(15) unsigned NOT NULL, FOREIGN KEY (id_lnkADMINURL) REFERENCES lnkADMINURL(id), FOREIGN KEY (id_lnkLOG) REFERENCES lnkLOG(id) ) ENGINE=MyISAM; +-- table for API key + +CREATE TABLE IF NOT EXISTS webapikey ( + id varchar(150) NOT NULL, + secret varchar(150) NOT NULL, + id_user int(15) NOT NULL, + status varchar(25) NOT NULL default 'OK', + description varchar(255) default NULL, + PRIMARY KEY (id), + KEY (id_user), + KEY (status) +) ENGINE=MyISAM; + -- end of file diff --git a/modules/miscutil/sql/tabdrop.sql b/modules/miscutil/sql/tabdrop.sql index 7122ee01e..80295c749 100644 --- a/modules/miscutil/sql/tabdrop.sql +++ b/modules/miscutil/sql/tabdrop.sql @@ -1,475 +1,475 @@ -- $Id$ -- This file is part of Invenio. -- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. -- -- Invenio is free software; you can redistribute it and/or -- modify it under the terms of the GNU General Public License as -- published by the Free Software Foundation; either version 2 of the -- License, or (at your option) any later version. -- -- Invenio is distributed in the hope that it will be useful, but -- WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with Invenio; if not, write to the Free Software Foundation, Inc., -- 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. DROP TABLE IF EXISTS bibrec; DROP TABLE IF EXISTS bib00x; DROP TABLE IF EXISTS bib01x; DROP TABLE IF EXISTS bib02x; DROP TABLE IF EXISTS bib03x; DROP TABLE IF EXISTS bib04x; DROP TABLE IF EXISTS bib05x; DROP TABLE IF EXISTS bib06x; DROP TABLE IF EXISTS bib07x; DROP TABLE IF EXISTS bib08x; DROP TABLE IF EXISTS bib09x; DROP TABLE IF EXISTS bib10x; DROP TABLE IF EXISTS bib11x; DROP TABLE IF EXISTS bib12x; DROP TABLE IF EXISTS bib13x; DROP TABLE IF EXISTS bib14x; DROP TABLE IF EXISTS bib15x; DROP TABLE IF EXISTS bib16x; DROP TABLE IF EXISTS bib17x; DROP TABLE IF EXISTS bib18x; DROP TABLE IF EXISTS bib19x; DROP TABLE IF EXISTS bib20x; DROP TABLE IF EXISTS bib21x; DROP TABLE IF EXISTS bib22x; DROP TABLE IF EXISTS bib23x; DROP TABLE IF EXISTS bib24x; DROP TABLE IF EXISTS bib25x; DROP TABLE IF EXISTS bib26x; DROP TABLE IF EXISTS bib27x; DROP TABLE IF EXISTS bib28x; DROP TABLE IF EXISTS bib29x; DROP TABLE IF EXISTS bib30x; DROP TABLE IF EXISTS bib31x; DROP TABLE IF EXISTS bib32x; DROP TABLE IF EXISTS bib33x; DROP TABLE IF EXISTS bib34x; DROP TABLE IF EXISTS bib35x; DROP TABLE IF EXISTS bib36x; DROP TABLE IF EXISTS bib37x; DROP TABLE IF EXISTS bib38x; DROP TABLE IF EXISTS bib39x; DROP TABLE IF EXISTS bib40x; DROP TABLE IF EXISTS bib41x; DROP TABLE IF EXISTS bib42x; DROP TABLE IF EXISTS bib43x; DROP TABLE IF EXISTS bib44x; DROP TABLE IF EXISTS bib45x; DROP TABLE IF EXISTS bib46x; DROP TABLE IF EXISTS bib47x; DROP TABLE IF EXISTS bib48x; DROP TABLE IF EXISTS bib49x; DROP TABLE IF EXISTS bib50x; DROP TABLE IF EXISTS bib51x; DROP TABLE IF EXISTS bib52x; DROP TABLE IF EXISTS bib53x; DROP TABLE IF EXISTS bib54x; DROP TABLE IF EXISTS bib55x; DROP TABLE IF EXISTS bib56x; DROP TABLE IF EXISTS bib57x; DROP TABLE IF EXISTS bib58x; DROP TABLE IF EXISTS bib59x; DROP TABLE IF EXISTS bib60x; DROP TABLE IF EXISTS bib61x; DROP TABLE IF EXISTS bib62x; DROP TABLE IF EXISTS bib63x; DROP TABLE IF EXISTS bib64x; DROP TABLE IF EXISTS bib65x; DROP TABLE IF EXISTS bib66x; DROP TABLE IF EXISTS bib67x; DROP TABLE IF EXISTS bib68x; DROP TABLE IF EXISTS bib69x; DROP TABLE IF EXISTS bib70x; DROP TABLE IF EXISTS bib71x; DROP TABLE IF EXISTS bib72x; DROP TABLE IF EXISTS bib73x; DROP TABLE IF EXISTS bib74x; DROP TABLE IF EXISTS bib75x; DROP TABLE IF EXISTS bib76x; DROP TABLE IF EXISTS bib77x; DROP TABLE IF EXISTS bib78x; DROP TABLE IF EXISTS bib79x; DROP TABLE IF EXISTS bib80x; DROP TABLE IF EXISTS bib81x; DROP TABLE IF EXISTS bib82x; DROP TABLE IF EXISTS bib83x; DROP TABLE IF EXISTS bib84x; DROP TABLE IF EXISTS bib85x; DROP TABLE IF EXISTS bib86x; DROP TABLE IF EXISTS bib87x; DROP TABLE IF EXISTS bib88x; DROP TABLE IF EXISTS bib89x; DROP TABLE IF EXISTS bib90x; DROP TABLE IF EXISTS bib91x; DROP TABLE IF EXISTS bib92x; DROP TABLE IF EXISTS bib93x; DROP TABLE IF EXISTS bib94x; DROP TABLE IF EXISTS bib95x; DROP TABLE IF EXISTS bib96x; DROP TABLE IF EXISTS bib97x; DROP TABLE IF EXISTS bib98x; DROP TABLE IF EXISTS bib99x; DROP TABLE IF EXISTS bibrec_bib00x; DROP TABLE IF EXISTS bibrec_bib01x; DROP TABLE IF EXISTS bibrec_bib02x; DROP TABLE IF EXISTS bibrec_bib03x; DROP TABLE IF EXISTS bibrec_bib04x; DROP TABLE IF EXISTS bibrec_bib05x; DROP TABLE IF EXISTS bibrec_bib06x; DROP TABLE IF EXISTS bibrec_bib07x; DROP TABLE IF EXISTS bibrec_bib08x; DROP TABLE IF EXISTS bibrec_bib09x; DROP TABLE IF EXISTS bibrec_bib10x; DROP TABLE IF EXISTS bibrec_bib11x; DROP TABLE IF EXISTS bibrec_bib12x; DROP TABLE IF EXISTS bibrec_bib13x; DROP TABLE IF EXISTS bibrec_bib14x; DROP TABLE IF EXISTS bibrec_bib15x; DROP TABLE IF EXISTS bibrec_bib16x; DROP TABLE IF EXISTS bibrec_bib17x; DROP TABLE IF EXISTS bibrec_bib18x; DROP TABLE IF EXISTS bibrec_bib19x; DROP TABLE IF EXISTS bibrec_bib20x; DROP TABLE IF EXISTS bibrec_bib21x; DROP TABLE IF EXISTS bibrec_bib22x; DROP TABLE IF EXISTS bibrec_bib23x; DROP TABLE IF EXISTS bibrec_bib24x; DROP TABLE IF EXISTS bibrec_bib25x; DROP TABLE IF EXISTS bibrec_bib26x; DROP TABLE IF EXISTS bibrec_bib27x; DROP TABLE IF EXISTS bibrec_bib28x; DROP TABLE IF EXISTS bibrec_bib29x; DROP TABLE IF EXISTS bibrec_bib30x; DROP TABLE IF EXISTS bibrec_bib31x; DROP TABLE IF EXISTS bibrec_bib32x; DROP TABLE IF EXISTS bibrec_bib33x; DROP TABLE IF EXISTS bibrec_bib34x; DROP TABLE IF EXISTS bibrec_bib35x; DROP TABLE IF EXISTS bibrec_bib36x; DROP TABLE IF EXISTS bibrec_bib37x; DROP TABLE IF EXISTS bibrec_bib38x; DROP TABLE IF EXISTS bibrec_bib39x; DROP TABLE IF EXISTS bibrec_bib40x; DROP TABLE IF EXISTS bibrec_bib41x; DROP TABLE IF EXISTS bibrec_bib42x; DROP TABLE IF EXISTS bibrec_bib43x; DROP TABLE IF EXISTS bibrec_bib44x; DROP TABLE IF EXISTS bibrec_bib45x; DROP TABLE IF EXISTS bibrec_bib46x; DROP TABLE IF EXISTS bibrec_bib47x; DROP TABLE IF EXISTS bibrec_bib48x; DROP TABLE IF EXISTS bibrec_bib49x; DROP TABLE IF EXISTS bibrec_bib50x; DROP TABLE IF EXISTS bibrec_bib51x; DROP TABLE IF EXISTS bibrec_bib52x; DROP TABLE IF EXISTS bibrec_bib53x; DROP TABLE IF EXISTS bibrec_bib54x; DROP TABLE IF EXISTS bibrec_bib55x; DROP TABLE IF EXISTS bibrec_bib56x; DROP TABLE IF EXISTS bibrec_bib57x; DROP TABLE IF EXISTS bibrec_bib58x; DROP TABLE IF EXISTS bibrec_bib59x; DROP TABLE IF EXISTS bibrec_bib60x; DROP TABLE IF EXISTS bibrec_bib61x; DROP TABLE IF EXISTS bibrec_bib62x; DROP TABLE IF EXISTS bibrec_bib63x; DROP TABLE IF EXISTS bibrec_bib64x; DROP TABLE IF EXISTS bibrec_bib65x; DROP TABLE IF EXISTS bibrec_bib66x; DROP TABLE IF EXISTS bibrec_bib67x; DROP TABLE IF EXISTS bibrec_bib68x; DROP TABLE IF EXISTS bibrec_bib69x; DROP TABLE IF EXISTS bibrec_bib70x; DROP TABLE IF EXISTS bibrec_bib71x; DROP TABLE IF EXISTS bibrec_bib72x; DROP TABLE IF EXISTS bibrec_bib73x; DROP TABLE IF EXISTS bibrec_bib74x; DROP TABLE IF EXISTS bibrec_bib75x; DROP TABLE IF EXISTS bibrec_bib76x; DROP TABLE IF EXISTS bibrec_bib77x; DROP TABLE IF EXISTS bibrec_bib78x; DROP TABLE IF EXISTS bibrec_bib79x; DROP TABLE IF EXISTS bibrec_bib80x; DROP TABLE IF EXISTS bibrec_bib81x; DROP TABLE IF EXISTS bibrec_bib82x; DROP TABLE IF EXISTS bibrec_bib83x; DROP TABLE IF EXISTS bibrec_bib84x; DROP TABLE IF EXISTS bibrec_bib85x; DROP TABLE IF EXISTS bibrec_bib86x; DROP TABLE IF EXISTS bibrec_bib87x; DROP TABLE IF EXISTS bibrec_bib88x; DROP TABLE IF EXISTS bibrec_bib89x; DROP TABLE IF EXISTS bibrec_bib90x; DROP TABLE IF EXISTS bibrec_bib91x; DROP TABLE IF EXISTS bibrec_bib92x; DROP TABLE IF EXISTS bibrec_bib93x; DROP TABLE IF EXISTS bibrec_bib94x; DROP TABLE IF EXISTS bibrec_bib95x; DROP TABLE IF EXISTS bibrec_bib96x; DROP TABLE IF EXISTS bibrec_bib97x; DROP TABLE IF EXISTS bibrec_bib98x; DROP TABLE IF EXISTS bibrec_bib99x; DROP TABLE IF EXISTS bibfmt; DROP TABLE IF EXISTS idxINDEX; DROP TABLE IF EXISTS idxINDEXNAME; DROP TABLE IF EXISTS idxINDEX_field; DROP TABLE IF EXISTS idxWORD01F; DROP TABLE IF EXISTS idxWORD02F; DROP TABLE IF EXISTS idxWORD03F; DROP TABLE IF EXISTS idxWORD04F; DROP TABLE IF EXISTS idxWORD05F; DROP TABLE IF EXISTS idxWORD06F; DROP TABLE IF EXISTS idxWORD07F; DROP TABLE IF EXISTS idxWORD08F; DROP TABLE IF EXISTS idxWORD09F; DROP TABLE IF EXISTS idxWORD10F; DROP TABLE IF EXISTS idxWORD11F; DROP TABLE IF EXISTS idxWORD12F; DROP TABLE IF EXISTS idxWORD13F; DROP TABLE IF EXISTS idxWORD14F; DROP TABLE IF EXISTS idxWORD15F; DROP TABLE IF EXISTS idxWORD16F; DROP TABLE IF EXISTS idxWORD17F; DROP TABLE IF EXISTS idxWORD18F; DROP TABLE IF EXISTS idxWORD01R; DROP TABLE IF EXISTS idxWORD02R; DROP TABLE IF EXISTS idxWORD03R; DROP TABLE IF EXISTS idxWORD04R; DROP TABLE IF EXISTS idxWORD05R; DROP TABLE IF EXISTS idxWORD06R; DROP TABLE IF EXISTS idxWORD07R; DROP TABLE IF EXISTS idxWORD08R; DROP TABLE IF EXISTS idxWORD09R; DROP TABLE IF EXISTS idxWORD10R; DROP TABLE IF EXISTS idxWORD11R; DROP TABLE IF EXISTS idxWORD12R; DROP TABLE IF EXISTS idxWORD13R; DROP TABLE IF EXISTS idxWORD14R; DROP TABLE IF EXISTS idxWORD15R; DROP TABLE IF EXISTS idxWORD16R; DROP TABLE IF EXISTS idxWORD17R; DROP TABLE IF EXISTS idxWORD18R; DROP TABLE IF EXISTS idxPAIR01F; DROP TABLE IF EXISTS idxPAIR02F; DROP TABLE IF EXISTS idxPAIR03F; DROP TABLE IF EXISTS idxPAIR04F; DROP TABLE IF EXISTS idxPAIR05F; DROP TABLE IF EXISTS idxPAIR06F; DROP TABLE IF EXISTS idxPAIR07F; DROP TABLE IF EXISTS idxPAIR08F; DROP TABLE IF EXISTS idxPAIR09F; DROP TABLE IF EXISTS idxPAIR10F; DROP TABLE IF EXISTS idxPAIR11F; DROP TABLE IF EXISTS idxPAIR12F; DROP TABLE IF EXISTS idxPAIR13F; DROP TABLE IF EXISTS idxPAIR14F; DROP TABLE IF EXISTS idxPAIR15F; DROP TABLE IF EXISTS idxPAIR16F; DROP TABLE IF EXISTS idxPAIR17F; DROP TABLE IF EXISTS idxPAIR18F; DROP TABLE IF EXISTS idxPAIR01R; DROP TABLE IF EXISTS idxPAIR02R; DROP TABLE IF EXISTS idxPAIR03R; DROP TABLE IF EXISTS idxPAIR04R; DROP TABLE IF EXISTS idxPAIR05R; DROP TABLE IF EXISTS idxPAIR06R; DROP TABLE IF EXISTS idxPAIR07R; DROP TABLE IF EXISTS idxPAIR08R; DROP TABLE IF EXISTS idxPAIR09R; DROP TABLE IF EXISTS idxPAIR10R; DROP TABLE IF EXISTS idxPAIR11R; DROP TABLE IF EXISTS idxPAIR12R; DROP TABLE IF EXISTS idxPAIR13R; DROP TABLE IF EXISTS idxPAIR14R; DROP TABLE IF EXISTS idxPAIR15R; DROP TABLE IF EXISTS idxPAIR16R; DROP TABLE IF EXISTS idxPAIR17R; DROP TABLE IF EXISTS idxPAIR18R; DROP TABLE IF EXISTS idxPHRASE01F; DROP TABLE IF EXISTS idxPHRASE02F; DROP TABLE IF EXISTS idxPHRASE03F; DROP TABLE IF EXISTS idxPHRASE04F; DROP TABLE IF EXISTS idxPHRASE05F; DROP TABLE IF EXISTS idxPHRASE06F; DROP TABLE IF EXISTS idxPHRASE07F; DROP TABLE IF EXISTS idxPHRASE08F; DROP TABLE IF EXISTS idxPHRASE09F; DROP TABLE IF EXISTS idxPHRASE10F; DROP TABLE IF EXISTS idxPHRASE11F; DROP TABLE IF EXISTS idxPHRASE12F; DROP TABLE IF EXISTS idxPHRASE13F; DROP TABLE IF EXISTS idxPHRASE14F; DROP TABLE IF EXISTS idxPHRASE15F; DROP TABLE IF EXISTS idxPHRASE16F; DROP TABLE IF EXISTS idxPHRASE17F; DROP TABLE IF EXISTS idxPHRASE18F; DROP TABLE IF EXISTS idxPHRASE01R; DROP TABLE IF EXISTS idxPHRASE02R; DROP TABLE IF EXISTS idxPHRASE03R; DROP TABLE IF EXISTS idxPHRASE04R; DROP TABLE IF EXISTS idxPHRASE05R; DROP TABLE IF EXISTS idxPHRASE06R; DROP TABLE IF EXISTS idxPHRASE07R; DROP TABLE IF EXISTS idxPHRASE08R; DROP TABLE IF EXISTS idxPHRASE09R; DROP TABLE IF EXISTS idxPHRASE10R; DROP TABLE IF EXISTS idxPHRASE11R; DROP TABLE IF EXISTS idxPHRASE12R; DROP TABLE IF EXISTS idxPHRASE13R; DROP TABLE IF EXISTS idxPHRASE14R; DROP TABLE IF EXISTS idxPHRASE15R; DROP TABLE IF EXISTS idxPHRASE16R; DROP TABLE IF EXISTS idxPHRASE17R; DROP TABLE IF EXISTS idxPHRASE18R; DROP TABLE IF EXISTS rnkMETHOD; DROP TABLE IF EXISTS rnkMETHODNAME; DROP TABLE IF EXISTS rnkMETHODDATA; DROP TABLE IF EXISTS rnkWORD01F; DROP TABLE IF EXISTS rnkWORD01R; DROP TABLE IF EXISTS rnkPAGEVIEWS; DROP TABLE IF EXISTS rnkDOWNLOADS; DROP TABLE IF EXISTS rnkCITATIONDATA; DROP TABLE IF EXISTS rnkCITATIONDATAEXT; DROP TABLE IF EXISTS rnkAUTHORDATA; DROP TABLE IF EXISTS collection_rnkMETHOD; DROP TABLE IF EXISTS collection; DROP TABLE IF EXISTS collectionname; DROP TABLE IF EXISTS oaiREPOSITORY; DROP TABLE IF EXISTS oaiHARVEST; DROP TABLE IF EXISTS oaiHARVESTLOG; DROP TABLE IF EXISTS bibHOLDINGPEN; DROP TABLE IF EXISTS collection_collection; DROP TABLE IF EXISTS collection_portalbox; DROP TABLE IF EXISTS portalbox; DROP TABLE IF EXISTS collection_example; DROP TABLE IF EXISTS example; DROP TABLE IF EXISTS collection_format; DROP TABLE IF EXISTS format; DROP TABLE IF EXISTS formatname; DROP TABLE IF EXISTS collection_field_fieldvalue; DROP TABLE IF EXISTS field; DROP TABLE IF EXISTS fieldname; DROP TABLE IF EXISTS fieldvalue; DROP TABLE IF EXISTS field_tag; DROP TABLE IF EXISTS tag; DROP TABLE IF EXISTS publreq; DROP TABLE IF EXISTS session; DROP TABLE IF EXISTS user; DROP TABLE IF EXISTS accROLE; DROP TABLE IF EXISTS accMAILCOOKIE; DROP TABLE IF EXISTS user_accROLE; DROP TABLE IF EXISTS accACTION; DROP TABLE IF EXISTS accARGUMENT; DROP TABLE IF EXISTS accROLE_accACTION_accARGUMENT; DROP TABLE IF EXISTS user_query; DROP TABLE IF EXISTS query; DROP TABLE IF EXISTS user_basket; DROP TABLE IF EXISTS basket; DROP TABLE IF EXISTS basket_record; DROP TABLE IF EXISTS record; DROP TABLE IF EXISTS user_query_basket; DROP TABLE IF EXISTS cmtRECORDCOMMENT; DROP TABLE IF EXISTS knwKB; DROP TABLE IF EXISTS knwKBRVAL; DROP TABLE IF EXISTS knwKBDDEF; DROP TABLE IF EXISTS sbmACTION; DROP TABLE IF EXISTS sbmALLFUNCDESCR; DROP TABLE IF EXISTS sbmAPPROVAL; DROP TABLE IF EXISTS sbmCPLXAPPROVAL; DROP TABLE IF EXISTS sbmCOLLECTION; DROP TABLE IF EXISTS sbmCOLLECTION_sbmCOLLECTION; DROP TABLE IF EXISTS sbmCOLLECTION_sbmDOCTYPE; DROP TABLE IF EXISTS sbmCATEGORIES; DROP TABLE IF EXISTS sbmCHECKS; DROP TABLE IF EXISTS sbmCOOKIES; DROP TABLE IF EXISTS sbmDOCTYPE; DROP TABLE IF EXISTS sbmFIELD; DROP TABLE IF EXISTS sbmFIELDDESC; DROP TABLE IF EXISTS sbmFORMATEXTENSION; DROP TABLE IF EXISTS sbmFUNCTIONS; DROP TABLE IF EXISTS sbmFUNDESC; DROP TABLE IF EXISTS sbmGFILERESULT; DROP TABLE IF EXISTS sbmIMPLEMENT; DROP TABLE IF EXISTS sbmPARAMETERS; DROP TABLE IF EXISTS sbmPUBLICATION; DROP TABLE IF EXISTS sbmPUBLICATIONCOMM; DROP TABLE IF EXISTS sbmPUBLICATIONDATA; DROP TABLE IF EXISTS sbmREFEREES; DROP TABLE IF EXISTS sbmSUBMISSIONS; DROP TABLE IF EXISTS schTASK; DROP TABLE IF EXISTS bibdoc; DROP TABLE IF EXISTS bibdoc_bibdoc; DROP TABLE IF EXISTS bibrec_bibdoc; DROP TABLE IF EXISTS bibdocfsinfo; DROP TABLE IF EXISTS usergroup; DROP TABLE IF EXISTS user_usergroup; DROP TABLE IF EXISTS user_basket; DROP TABLE IF EXISTS msgMESSAGE; DROP TABLE IF EXISTS user_msgMESSAGE; DROP TABLE IF EXISTS bskBASKET; DROP TABLE IF EXISTS bskEXTREC; DROP TABLE IF EXISTS bskEXTFMT; DROP TABLE IF EXISTS bskREC; DROP TABLE IF EXISTS bskRECORDCOMMENT; DROP TABLE IF EXISTS cmtACTIONHISTORY; DROP TABLE IF EXISTS cmtSUBSCRIPTION; DROP TABLE IF EXISTS user_bskBASKET; DROP TABLE IF EXISTS usergroup_bskBASKET; DROP TABLE IF EXISTS collection_externalcollection; DROP TABLE IF EXISTS externalcollection; DROP TABLE IF EXISTS collectiondetailedrecordpagetabs; DROP TABLE IF EXISTS staEVENT; DROP TABLE IF EXISTS clsMETHOD; DROP TABLE IF EXISTS collection_clsMETHOD; DROP TABLE IF EXISTS jrnJOURNAL; DROP TABLE IF EXISTS jrnISSUE; DROP TABLE IF EXISTS hstRECORD; DROP TABLE IF EXISTS hstDOCUMENT; DROP TABLE IF EXISTS hstTASK; DROP TABLE IF EXISTS hstBATCHUPLOAD; DROP TABLE IF EXISTS crcBORROWER; DROP TABLE IF EXISTS crcILLREQUEST; DROP TABLE IF EXISTS crcITEM; DROP TABLE IF EXISTS crcLIBRARY; DROP TABLE IF EXISTS crcLOAN; DROP TABLE IF EXISTS crcLOANREQUEST; DROP TABLE IF EXISTS crcPURCHASE; DROP TABLE IF EXISTS crcVENDOR; DROP TABLE IF EXISTS expJOB; DROP TABLE IF EXISTS expQUERY; DROP TABLE IF EXISTS expJOB_expQUERY; DROP TABLE IF EXISTS expQUERYRESULT; DROP TABLE IF EXISTS expJOBRESULT; DROP TABLE IF EXISTS expJOBRESULT_expQUERYRESULT; DROP TABLE IF EXISTS user_expJOB; DROP TABLE IF EXISTS swrREMOTESERVER; DROP TABLE IF EXISTS swrCLIENTDATA; DROP TABLE IF EXISTS hstEXCEPTION; DROP TABLE IF EXISTS aidUSERINPUTLOG; DROP TABLE IF EXISTS aidCACHE; DROP TABLE IF EXISTS aidPERSONIDDATA; DROP TABLE IF EXISTS aidPERSONIDPAPERS; DROP TABLE IF EXISTS aidRESULTS; -DROP TABLE IF EXISTS aidPROBCACHE; DROP TABLE IF EXISTS xtrJOB; DROP TABLE IF EXISTS bsrMETHOD; DROP TABLE IF EXISTS bsrMETHODNAME; DROP TABLE IF EXISTS bsrMETHODDATA; DROP TABLE IF EXISTS bsrMETHODDATABUCKET; DROP TABLE IF EXISTS collection_bsrMETHOD; DROP TABLE IF EXISTS lnkENTRY; DROP TABLE IF EXISTS lnkENTRYURLTITLE; DROP TABLE IF EXISTS lnkENTRYLOG; DROP TABLE IF EXISTS lnkLOG; DROP TABLE IF EXISTS lnkADMINURL; DROP TABLE IF EXISTS lnkADMINURLLOG; +DROP TABLE IF EXISTS webapikey; -- end of file diff --git a/modules/webaccess/doc/admin/webaccess-admin-guide.webdoc b/modules/webaccess/doc/admin/webaccess-admin-guide.webdoc index b67bf559c..61ca08d9b 100644 --- a/modules/webaccess/doc/admin/webaccess-admin-guide.webdoc +++ b/modules/webaccess/doc/admin/webaccess-admin-guide.webdoc @@ -1,1136 +1,1144 @@ ## -*- mode: html; coding: utf-8; -*- ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

Contents

1.
Introduction, using roles
2. WebAccess admin interface
3. Example pages, illustrating snapshots
4. Managing accounts / Access policy
5. Managing login methods
6. Firewall-like role definitions

1. INTRODUCTION, USING ROLES

   WebAccess is a common RBAC, role based access control, for all of
   Invenio. This means that users are connected to roles that cover
   different areas of access. I.e administrator of the photo
   collection or system librarian. Users can be active in
   different areas and of course connected to as many roles as needed.
 
   The roles are connected to actions. An action identifies a task you
   can perform in Invenio. It can be defined to take any number of
   arguments in order to more clearly describe what you are allowing
   connected users to do.
 
   For example the system librarian can be allowed to run bibindex on
   the different indexes. To allow system librarians to run the
   bibindex indexing on the field author we connect role system
   librarian with action runbibindex using the argument
   index='author'.
 
   Additionally, roles could have firewall-like role
   definitions. A definition is a formal description of which
   users are entitled to belong to the role. So you have two ways for
   connecting users to roles. Either linking explicitly a user with the
   role or describing the characteristics that makes users belong to
   the role.
 
   WebAccess is based on allowing users to perform actions. This means
   that only allowed actions are stored in the access control engine's
   database.
 

2. WEBACCESS ADMIN INTERFACE

 All the WebAccess Administration web pages have certain
 features/design choices in common
 
 - Divided into steps
 
   The process of adding new authorizations/information is
   stepwise. The subtitle contains information about wich step you are
   on and what you are supposed to do.
 
 - Restart from any wanted step
 
   You can always start from an earlier step by simply clicking the
   wanted button. This is not a way to undo changes! No information
   about previous database is kept, so all changes are definite.
 
 - Change or new entry must confirmed
 
   On all the pages you will be asked to confirm the change, with
   information about what kind of change you are about to perform.
 
 - Links to other relevant admin areas on the right side
 
   To make it easier to perform your administration tasks, we have
   added a menu area on the right hand side of these pages. The menu
   contain links to other relevant admin pages and change according to
   the page you are on and the information you have selected.
 

3. EXAMPLE PAGES

 I. Role area
 II. Example - connecting role and user
 
 
 I. Role area
 
   Administration tasks starts in one of the administration areas. The
   role area is the main area from where you can perform all your
   managing tasks. The other admin areas are just other ways of
   entering.
 
 

Role Administration

administration with roles as access point
Users:
add or remove users from the access to a role and its priviliges.
Authorizations/Actions:
these terms means almost the same, but an authorization is a
connection between a role and an action (possibly) containing arguments.
Roles:
see all the information attached to a role and decide if you want to
delete it.
id name description definition users authorizations / actions role
2 photoadmin Photo collection administrator None add / delete add / modify / remove modify / delete show details
1 superadmin superuser with all rights allow email /.*@cern.ch/ add / delete add / modify / remove modify / delete show details
3 webaccessadmin WebAccess administrator allow nickname 'jekyll' add / delete add / modify / remove modify / delete show details
Create new role
go here to add a new role.
Create new action
go here to add a new action.
 
 II. Example - connecting role and user
 
   One of the important tasks that can be handled via the WebAccess Admin Web Interface
   is the delegation of access rights to users. This is done by connecting them to the
   different roles offered.
 
   The task is divided into 5 simple and comprehensive steps. Below follows the pages from
   the different steps with comments on the ongoing procedure.
 
 - step 1 - select a role
 
   You must first select the role you want to connect users to. All the available roles are
   listed alfabetically in a select box. Just find the wanted role and select it. Then click on
   the button saying "select role".
 
   If you start from the Role Area, this step is already done, and you start directly on step 2.
 
 

Connect user to role

step 1 - select a role
1. select role
Create new role
go here to add a new role.
 
 - step 2 - search for users
 
   As you can see, the subtitle of the page has now changed. The subtitle always tells you
   which step you are on and what your current task is.
 
   There can be possibly thousands of users using your online library, therefore it is important
   to make it easier to identify the user you are looking for. Give part of, or the entire search
   string and all users with partly matching e-mails will be listed on the next step.
 
   You can also see that the right hand menu has changed. This area is always updated with links
   to related admin areas.
 
 

Connect user to role

step 2 - search for users
1. select role
2. search pattern
Create new role
go here to add a new role.
Remove users
remove users from role superadmin.
Connected users
show all users connected to role superadmin.
Add authorization
start adding new authorizations to role superadmin.
 
 - step 3 - select a user.
 
   The select box contains all users with partly matching e-mail addresses. Select the one
   you want to connect to the role and continue.
 
   Notice the navigation trail that tells you were on the Administrator pages you are currently
   working.
 
 

Connect user to role

step 3 - select a user
1. select role
2. search pattern
3. select user
Create new role
go here to add a new role.
Remove users
remove users from role superadmin.
Connected users
show all users connected to role superadmin.
Add authorization
start adding new authorizations to role superadmin.
 
 - step 4 - confirm to add user
 
   All WebAccess Administrator web pages display the action you are about to peform, this
   means explaining what kind of addition, change or update will be done to your access control
   data.
 
   If you are happy with your decision, simply confirm it.
 
 

Connect user to role

step 4 - confirm to add user
1. select role
2. search pattern
3. select user
add user mikael.vik@cern.ch to role superadmin?
Create new role
go here to add a new role.
Remove users
remove users from role superadmin.
Connected users
show all users connected to role superadmin.
Add authorization
start adding new authorizations to role superadmin.
 
 - step 5 - confirm user added.
 
   The user has now been added to this role. You can easily continue adding more users to this
   role be restarting from step 2 or 3. You can also go directly to another area and keep working
   on the same role.
 
 

Connect user to role

step 5 - confirm user added
1. select role
2. search pattern
3. select user
add user mikael.vik@cern.ch to role superadmin?

confirm: user mikael.vik@cern.ch added to role superadmin.

Create new role
go here to add a new role.
Remove users
remove users from role superadmin.
Connected users
show all users connected to role superadmin.
Add authorization
start adding new authorizations to role superadmin.
 
 - we are done
 
   This example is very similar to all the other pages where you administrate WebAccess. The pages
   are an easy gateway to maintaing access control rights and share a lot of features.
   - divided into steps
   - restart from any wanted step (not undo)
   - changes must be confirmed
   - link to other relevant areas
   - prevent unwanted input
 
   As an administrator with access to these pages you are free to manage the rights any way you want.
 

IV. Managing accounts and access policy

   Here you can administrate the accounts and the access policy for your Invenio installation.
 
   - Access policy:
 
     To change the access policy, the general config file (or
     access_control_config.py) must be edited manually in a text
     editor. The site can there be defined as opened or closed, you can
     edit the access policy level for guest accounts, registered
     accounts and decide when to warn the owner of the account when
     something happens with it, either when it is created, deleted or
     approved.  The Apache server must be restarted after modifying
     these settings.
 
     The two levels for guest account, are:
        0 - Allow guest accounts
        1 - Do not allow guest accounts
     The five levels for normal accounts, are:
        0 - Allow user to create account, automatically activate new accounts
        1 - Allow user to create account, administrator must activate account
        2 - Only administrators can create account. User cannot edit the email address.
        3 - Users cannot register or update account information (email/password)
        4 - User cannot change default login method
     You can configure Invenio to send an email:
        1. To an admin email-address when an account is created
        2. To the owner of an account when it is created
        3. To the owner of an account when it is activated
        4. To the owner of an account when it is deleted
 
     Define how open the site is:
       0 = normal operation of the site
       1 = read-only site, all write operations temporarily closed
       2 = site fully closed
       3 = database connections disabled
       CFG_ACCESS_CONTROL_LEVEL_SITE = 0
     Access policy for guests:
       0 = Allow guests to search,
       1 = Guests cannot search (all users must login)
       CFG_ACCESS_CONTROL_LEVEL_GUESTS = 0
     Access policy for accounts:
       0 = Users can register, automatically acticate accounts
       1 = Users can register, but admin must activate the accounts
       2 = Users cannot register or change email address, only admin can register accounts.
       3 = Users cannot register or update email address or password, only admin can register accounts.
       4 = Same as 3, but user cannot change login method.
       CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS = 0
 
     Limit email addresses available to use when register a new account (example: cern.ch):
       CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN = ""
 
     Send an email when a new account is created by an user:
       CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS = 0
 
     Send an email to the user notifying when the account is created:
       CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT = 0
 
     Send an email to the user notifying when the account is activated:
       CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION = 0
 
     Send an email to the user notifying when the account is deleted/rejected:
       CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION = 0
 
   - Account overview:
     Here you find an overview of the number of guest accounts, registered accounts and accounts
     awaiting activation, with a link to the activation page.
 
   - Create account:
     For creating new accounts, the email address must be unique. If configured to do so, an email
     will be sent to the given address when an account is created.
 
   - Edit accounts:
     For activating or rejecting accounts in addition to modifying them. An activated account can be
     inactivated for a short period of time, but this will not warn the account owner. To find accounts
     enter a part of the email address of the account and then search. This may take some time. If there
     are more than the selected number of accounts per page, you can use the next/prev links to switch
     pages. The accounts to search in can also be limited to only activated or not activated accounts.
 
   - Edit account:
     When editing one account, you can change the email address, password, delete the account, or modify
     the baskets or alerts belonging to one account. Which login method should be the default for this
     account can also be selected. To modify baskets or alerts, you need to login as the user, and
     modify the desired data as a normal user. Remember to log out as the user when you are finished
     editing.
+    Here you can also edit the user's REST API keys changing its status to the desired one. This status
+    is specified by CFG_REST_API_KEY_STATUS (only the first three are mandatory):
+      0 = OK
+      1 = REMOVED
+      2 = REVOKED
+      3 = WARNING1
+      4 = WARNING2
+      5 = WARNING3
 
 

V. Managing login methods

    Invenio supports using external login systems to authenticate users.
 
    When a user wants to login, the username and password given by the user is checked against the selected
    system, if the user is authenticated by the external system, a valid email-address is returned to
    Invenio and used to recognize the user within Invenio.
 
    If a new user is trying to login without having an account, using an external login system, an account
    is automatically created in Invenio to recognize and store the users settings. The password for the
    local account is randomly generated.
 
    If you want the user to be unable to change login method and account username / password, forcing use
    of certain external systems, set CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS to 4 as mentioned in the last paragraph.
 
    If a user is changing login method from an external one to the internal, he also need to either change the
    password before logging out, or set the password via the lost password email service.
 
    If you are using CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS with a value greater than 1 note
    that, even if the first login of a user through an external authentication technically means registering
    the user into the system, this is not the semantic expected behaviour by the user. The user is already
    registered at an authority that we trust, so there's no need to prevent the user from being imported
    into the system. That's why for external authentication CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS is not
    considered apart from what said above.
 
    If a external login system is used, you may want to protect the users username / password using HTTPS.
 
    To add new system, two changes must be made (for the time being):
    - The name of the method, if it is default or not, and an instance of the class must be added to the variable
      CFG_EXTERNAL_AUTHENTICATION in access_control_config.py.
 
    - A class must be created derived from the class external_authentication inside file
      external_authentication.py. This class must include at least the
      function auth_user. This function returns a valid email-address in Invenio if the user
      is authenticated, not necessarily the same entered by the user as username. If the user
      is not authenticated, return None.
      The class could also provide five more methods: fetch_user_preferences, user_exists,
      fetch_user_groups_membership and fetch_all_users_groups_membership.
      The first should take an email and eventually a password and should return a dictionary of keys
      and value representing external preferences, infos or settings. If, for some reasons, you like
      to force some kind of hiding for some particular field you should export the related key
      prefixed by "HIDDEN_". Those fields won't be displayed in tables and pages regarding external
      settings.
      The second method should check through the external system if a particular email exists. If you
      provide such a method then a user will be able to switch from and to this authorization method.
      The third method should take an email and (if necessary) a password
      and should return a dictionary of external_groups_names toghether with their description, for which
      the user has a membership. Those groups will be merged into the groups system.
      The user will be a member of those groups and will be able to use them in any place
      where groups are useful, but won't be able to unsubscribe or to administrate them.
      The fourth method should just return a dictionary of external groups as keys and tuples containing
      a group description and a list of email of users belonging to each groups. Those memberships
      will be merged into the database in the way done by the previous method, but could
      provide batch synchronization of groups.
      The fifth method should just return the nickname as is known by the external authentication
      system, given the usual email/username and the password.
      Note: if your system has more than one external login methods then incoherence in the groups
      memberships could happen when a user switch his login method. This will be fixed some times in the
      future.
      If you add as an attribute of your class the enforce_external_nicknames and set it to True, this will enforce
      the system to import external nicknames whenever the user login with the external login method for the
      first time. Since a nickname is not changable this will stay fixed forever. If this nickname is
      already registered in the system (suppose that is linked with a local account) then it will not be
      imported. If this variable doesn't exist or is set to False then no nickname will be
      imported and the user will be free to choose a nickname in the future (and then this will again
      stay forever).
      Note: every method will receive as last parameter the mod_python request object, that could
      be used for particular purposes.
 
 
      Example template:
      from invenio.external_authentication import ExternalAuth, InvenioWebAccessExternalAuthError
 
      class ExternalAuthFoo(ExternalAuth):
          """External authentication template example."""
 
          def __init__ (self):
              """Initialize stuff here."""
              self.name = None
              self.enforce_external_nicknames = False
              pass
 
          def auth_user(self, username, password, req=None):
              """Authenticate user-supplied USERNAME and PASSWORD.
              Return None if authentication failed, or the email address of the
              person if the authentication was successful.  In order to do
              this you may perhaps have to keep a translation table between
              usernames and email addresses.
              Raise InvenioWebAccessExternalAuthError in case of external troubles.
              """
              raise NotImplementedError
              #return None
 
          def user_exists(self, email, req=None):
              """Checks against external_authentication for existance of email.
              @return True if the user exists, False otherwise
              """
              raise NotImplementedError
 
          def fetch_user_groups_membership(self, username, password=None, req=None):
              """Given a username, returns a dictionary of groups
              and their description to which the user is subscribed.
              Raise InvenioWebAccessExternalAuthError in case of troubles.
              """
              raise NotImplementedError
              #return {}
 
          def fetch_user_preferences(self, username, password=None, req=None):
              """Given a username and a password, returns a dictionary of keys and
              values, corresponding to external infos and settings.
 
              userprefs = {"telephone": "2392489",
              "address": "10th Downing Street"}
              """
              raise NotImplementedError
             #return {}
 
          def fetch_all_users_groups_membership(self, req=None):
              """Fetch all the groups with a description, and users who belong to
              each groups.
              @return {'mygroup': ('description', ['email1', 'email2', ...]), ...}
              """
              raise NotImplementedError
 
          def fetch_user_nickname(self, username, password, req=None):
              """Given a username and a password, returns the right nickname belonging
              to that user (username could be an email).
              """
              raise NotImplementedError
              #return Nickname
 

VI. FIREWALL-LIKE ROLE DEFINITIONS

The FireRole language description
     In the WebAccess RBAC system, roles are built up from their names,
     description and definition.
 
     A definition is the way to formally implicitly define which users belong
     to which roles.
 
     A definition is expressed in a firewall like rules language. It's built up
     by rows which are matched from top to bottom, in order to decide if the
     current user (wethever he/she is logged in or not) may belong to a role.
 
     Any row has this syntax:
 
         ALLOW/DENY ANY/ALL
 
         ALLOW/DENY FROM/UNTIL "YYYY-MM-DD"
 
         or
 
         ALLOW/DENY [NOT] field {one or more values}
 
     The rows are parsed from top to bottom. If a row matches the user than the
     user belongs to the role if the rule is an ALLOW rule, otherwise, if the
     rule is a DENY one, the user doesn't belong to the role.
 
     A rule of the kind ALLOW|DENY ANY always matches, regardless of the user.
 
     Note, in place of ANY you can use the word ALL. The semantic is the same. The
     system support both to let the user comply with the English grammar.
 
     The second type of rule is interpreted as follows: given a date in the
     form "YYYY-MM-DD" (double-)quoted), the rule is matched if, when using FROM,
     the current date is either identical or 'bigger' than the given date, or if,
     when using UNTIL, the current date is either identical or 'smaller' than the
     given date. If the rule starts with ALLOW and is matched
     then the next row is evaluated. If it is not matched, then the whole FireRole
     will evaluate into a DENY ALL. If the rule starts with DENY and
     is matched then the whole FireRole while evaluate as a DENY ALL. If it is
     not matched then the next row is evaluated.
 
     The third type of rule is interpreted as follows: given a dictionary
     of keys:values describing a user (we will cover this below), the rule
     considers the value associated with the key named in field, and checks
     if it corresponds to at least one of the values in the "one or more values" list.
     This is a list of comma separated strings, which can be literal
     (double-)quoted strings or regexps (marked by `/' ... `/' signs). If at
     least a value matches (literally or through the regexp language), the
     whole rule is considered to match.
     If the optional NOT keyword is specified than if at least a value of the
     rule matches the rule is skipped, otherwise if all the value of the rules
     don't match the whole rule matches.
 
     A DENY ALL rule is implicitly added at the end of every definition. Note that
     this imply that, if you are using e.g. a temporal rule (FROM/UNTIL), you
     should explicitly add an additional row with value ALLOW ANY,
     if you actually want to allow users in the specified timeframe.
 
     Any field is valid, but only rules concerning fields which currently
     exist in the user describing dictionary are checked. All the rules
     with non existant fields are skipped.
 
     The user describing dictionary (user_info) is built at runtime with all the informations
     that can be gathered about the current user (and its session).
     Currently valid fields are: uid, email, nickname, remote_ip,
     remote_host, groups and all the external settings provided
     by the external authentication systems (e.g. CERN SSO provides:
     external_authmethod, external_building, external_department, external_email,
     external_external, external_firstname, external_fullname, external_homdir,
     external_homeinstitute, external_lastname, external_login, external_mobilenumber,
     external_phonenumber).
 
     Among those fields there are some special cases, which are remote_ip and
     (apache_)groups. Rules can refer to remote_ip either using a literal
     expression for specifing list of single ips, or a usual regexp (or list
     of regexps), or, also, using the common network group/mask notation
     (e.g. "127.0.0.0/24") as a literal string, which is a mix between literal
     expressions and regexps. (apache_)groups are related to group memberships.
     Since a user will probably belong to more than a group, then the rule
     matches if there's at least one group to which the user belong, that matches
     at least one of the expressions (NOT rules behave as you can imagine).
 
     The dictionary is built using the current user session. If the user is
     authenticated in some way (apache, locally, externally, SSO...) then more
     infos could be provided to the firerole system in order to decide if the
     user should belong to a role or not.
 
     The default fields that are always there are:
     
  • uid: an integer representing the user id
  • nickname: the nickname of the user
  • email: the email of the user
  • group/groups: local or external group to which the user belong
  • guest: 1 if the user is a guest (not logged), 0 otherwise
plus all the external setting retrieved by an external authentication system. If the action to which the role defined is raised from the webinterface of Invenio, then you will have those additional fields:
  • remote_ip: the remote ip address of the user who is browsing
  • remote_host: the remote hostname of the user who is browsing
  • referer: the webpage from where the user is coming from
  • uri: the uri the user is visiting
  • agent: the agent string describing the user's browser
Note that you can specify either (apache_)group or (apache_)groups (with or without the trailing s). They are semantically equal and are supported just to let people comply with the English grammar. Every rule is case-insensitive (apart values which must match literally and regexp values which don't explicitly specify case-insesitive matches). Every rule may contain comments preceded by the '#' character. Any comment is discarded. When you set a definition for a role, it is actually compiled and stored in a binary compressed form inside the database. If the syntax isn't correct this will be stated and the definition won't be set or updated. Example of role definition: allow not email /.*@gmail.com/,/.*@hotmail.com/ deny group badguys allow remote_ip "127.0.0.0/24" deny all This definition would match all users whose emails don't end with @gmail.com and @hotmail.com, or who don't belong to the group badguys and have remote_ip in the 24bit mask network of 127.0.0.0. All the the other users don't belong to the role which is being defined. If you want to discover which keys are available on your system to build a FireRole rule, just login with your account in your installation and visit your account page, by activating verbose=9 variable. Under the tile you will se the available keys and values that you can use to build a FireRole rule. All but fields prefixed with precached_ are usuable.
diff --git a/modules/webaccess/lib/access_control_config.py b/modules/webaccess/lib/access_control_config.py index 93a2e36f7..44efd2cdc 100644 --- a/modules/webaccess/lib/access_control_config.py +++ b/modules/webaccess/lib/access_control_config.py @@ -1,363 +1,378 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Invenio Access Control Config. """ __revision__ = \ "$Id$" # pylint: disable=C0301 from invenio.config import CFG_SITE_NAME, CFG_SITE_URL, CFG_SITE_LANG, \ CFG_SITE_SECURE_URL, CFG_SITE_SUPPORT_EMAIL, CFG_CERN_SITE, \ CFG_OPENAIRE_SITE, CFG_SITE_RECORD, CFG_INSPIRE_SITE from invenio.messages import gettext_set_language class InvenioWebAccessFireroleError(Exception): """Just an Exception to discover if it's a FireRole problem""" pass # VALUES TO BE EXPORTED # CURRENTLY USED BY THE FILES access_control_engine.py access_control_admin.py webaccessadmin_lib.py # name of the role giving superadmin rights SUPERADMINROLE = 'superadmin' # name of the webaccess webadmin role WEBACCESSADMINROLE = 'webaccessadmin' # name of the action allowing roles to access the web administrator interface WEBACCESSACTION = 'cfgwebaccess' # name of the action allowing roles to access the web administrator interface VIEWRESTRCOLL = 'viewrestrcoll' # name of the action allowing roles to delegate the rights to other roles # ex: libraryadmin to delegate libraryworker DELEGATEADDUSERROLE = 'accdelegaterole' # max number of users to display in the drop down selects MAXSELECTUSERS = 25 # max number of users to display in a page (mainly for user area) MAXPAGEUSERS = 25 # default role definition, source: CFG_ACC_EMPTY_ROLE_DEFINITION_SRC = 'deny all' # default role definition, compiled: CFG_ACC_EMPTY_ROLE_DEFINITION_OBJ = (False, ()) # default role definition, compiled and serialized: CFG_ACC_EMPTY_ROLE_DEFINITION_SER = None # List of tags containing (multiple) emails of users who should authorize # to access the corresponding record regardless of collection restrictions. if CFG_CERN_SITE: CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS = ['859__f', '270__m', '506__m'] else: CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS = ['8560_f'] # Use external source for access control? # CFG_EXTERNAL_AUTHENTICATION -- this is a dictionary with the enabled login method. # The key is the name of the login method and the value is an instance of # of the login method (see /help/admin/webaccess-admin-guide#5). Set the value # to None if you wish to use the local Invenio authentication method. # CFG_EXTERNAL_AUTH_DEFAULT -- set this to the key in CFG_EXTERNAL_AUTHENTICATION # that should be considered as default login method # CFG_EXTERNAL_AUTH_USING_SSO -- set this to the login method name of an SSO # login method, if any, otherwise set this to None. # CFG_EXTERNAL_AUTH_LOGOUT_SSO -- if CFG_EXTERNAL_AUTH_USING_SSO was not None # set this to the URL that should be contacted to perform an SSO logout from invenio.external_authentication_robot import ExternalAuthRobot if CFG_CERN_SITE: import external_authentication_sso as ea_sso CFG_EXTERNAL_AUTH_USING_SSO = "CERN" CFG_EXTERNAL_AUTH_DEFAULT = CFG_EXTERNAL_AUTH_USING_SSO CFG_EXTERNAL_AUTH_LOGOUT_SSO = 'https://login.cern.ch/adfs/ls/?wa=wsignout1.0' CFG_EXTERNAL_AUTHENTICATION = { CFG_EXTERNAL_AUTH_USING_SSO : ea_sso.ExternalAuthSSO(), } elif CFG_OPENAIRE_SITE: CFG_EXTERNAL_AUTH_DEFAULT = 'Local' CFG_EXTERNAL_AUTH_USING_SSO = False CFG_EXTERNAL_AUTH_LOGOUT_SSO = None CFG_EXTERNAL_AUTHENTICATION = { "Local": None, - "OpenAIRE": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=False), - "ZOpenAIRE": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=True) + "OpenAIRE": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=False, external_id_attribute_name="id"), + } +elif CFG_INSPIRE_SITE: + CFG_EXTERNAL_AUTH_DEFAULT = 'arXiv' + CFG_EXTERNAL_AUTH_USING_SSO = False + CFG_EXTERNAL_AUTH_LOGOUT_SSO = None + CFG_EXTERNAL_AUTHENTICATION = { + "arXiv": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=True) } elif CFG_INSPIRE_SITE: # INSPIRE specific robot configuration CFG_EXTERNAL_AUTH_DEFAULT = 'Local' CFG_EXTERNAL_AUTH_USING_SSO = False CFG_EXTERNAL_AUTH_LOGOUT_SSO = None CFG_EXTERNAL_AUTHENTICATION = { "Local": None, "Robot": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=False, check_user_ip=2), "ZRobot": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=True, check_user_ip=2) } else: CFG_EXTERNAL_AUTH_DEFAULT = 'Local' CFG_EXTERNAL_AUTH_USING_SSO = False CFG_EXTERNAL_AUTH_LOGOUT_SSO = None CFG_EXTERNAL_AUTHENTICATION = { "Local": None, "Robot": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=False), "ZRobot": ExternalAuthRobot(enforce_external_nicknames=True, use_zlib=True) } ## If using SSO, this is the number of seconds after which the keep-alive ## SSO handler is pinged again to provide fresh SSO information. CFG_EXTERNAL_AUTH_SSO_REFRESH = 600 # default data for the add_default_settings function # Note: by default the definition is set to deny any. This won't be a problem # because userid directly connected with roles will still be allowed. # roles # name description definition DEF_ROLES = ((SUPERADMINROLE, 'superuser with all rights', 'deny any'), (WEBACCESSADMINROLE, 'WebAccess administrator', 'deny any'), ('anyuser', 'Any user', 'allow any'), ('basketusers', 'Users who can use baskets', 'allow any'), ('loanusers', 'Users who can use loans', 'allow any'), ('groupusers', 'Users who can use groups', 'allow any'), ('alertusers', 'Users who can use alerts', 'allow any'), ('messageusers', 'Users who can use messages', 'allow any'), ('holdingsusers', 'Users who can view holdings', 'allow any'), ('statisticsusers', 'Users who can view statistics', 'allow any'), ('claimpaperusers', 'Users who can perform changes to their own paper attributions without the need for an operator\'s approval', 'allow any'), ('claimpaperoperators', 'Users who can perform changes to _all_ paper attributions without the need for an operator\'s approval', 'deny any'), ('paperclaimviewers', 'Users who can view "claim my paper" facilities.', 'allow all'), ('paperattributionviewers', 'Users who can view "attribute this paper" facilities', 'allow all'), ('paperattributionlinkviewers', 'Users who can see attribution links in the search', 'allow all'), ) # Demo site roles DEF_DEMO_ROLES = (('photocurator', 'Photo collection curator', 'deny any'), ('thesesviewer', 'Theses viewer', 'allow group "Theses viewers"'), ('thesescurator', 'Theses collection curator', 'deny any'), ('swordcurator', 'BibSword client curator', 'deny any'), ('referee_DEMOBOO_*', 'Book collection curator', 'deny any'), ('restrictedpicturesviewer', 'Restricted pictures viewer', 'deny any'), ('curator', 'Curator', 'deny any'), ('basketusers', 'Users who can use baskets', 'deny email "hyde@cds.cern.ch"\nallow any'), ('claimpaperusers', 'Users who can perform changes to their own paper attributions without the need for an operator\'s approval', 'deny email "hyde@cds.cern.ch"\nallow any'), ('submit_DEMOJRN_*', 'Users who can submit (and modify) "Atlantis Times" articles', 'deny all'), ('atlantiseditor', 'Users who can configure "Atlantis Times" journal', 'deny all'), ('commentmoderator', 'Users who can moderate comments', 'deny all'), ('poetrycommentreader', 'Users who can view comments in Poetry collection', 'deny all')) DEF_DEMO_USER_ROLES = (('jekyll@cds.cern.ch', 'thesesviewer'), ('jekyll@cds.cern.ch', 'swordcurator'), ('jekyll@cds.cern.ch', 'claimpaperusers'), ('dorian.gray@cds.cern.ch', 'referee_DEMOBOO_*'), ('balthasar.montague@cds.cern.ch', 'curator'), ('romeo.montague@cds.cern.ch', 'restrictedpicturesviewer'), ('romeo.montague@cds.cern.ch', 'swordcurator'), ('romeo.montague@cds.cern.ch', 'thesescurator'), ('juliet.capulet@cds.cern.ch', 'restrictedpicturesviewer'), ('juliet.capulet@cds.cern.ch', 'photocurator'), ('romeo.montague@cds.cern.ch', 'submit_DEMOJRN_*'), ('juliet.capulet@cds.cern.ch', 'submit_DEMOJRN_*'), ('balthasar.montague@cds.cern.ch', 'atlantiseditor'), ('romeo.montague@cds.cern.ch', 'poetrycommentreader')) # users # list of e-mail addresses DEF_USERS = [] # actions # name desc allowedkeywords optional DEF_ACTIONS = ( ('cfgwebsearch', 'configure WebSearch', '', 'no'), ('cfgbibformat', 'configure BibFormat', '', 'no'), ('cfgbibknowledge', 'configure BibKnowledge', '', 'no'), ('cfgwebsubmit', 'configure WebSubmit', '', 'no'), ('cfgbibrank', 'configure BibRank', '', 'no'), ('cfgwebcomment', 'configure WebComment', '', 'no'), ('cfgweblinkback', 'configure WebLinkback' , '', 'no'), ('cfgoaiharvest', 'configure OAI Harvest', '', 'no'), ('cfgoairepository', 'configure OAI Repository', '', 'no'), ('cfgbibindex', 'configure BibIndex', '', 'no'), ('cfgbibexport', 'configure BibExport', '', 'no'), ('cfgrobotkeys', 'configure Robot keys', 'login_method,robot', 'yes'), ('cfgbibsort', 'configure BibSort', '', 'no'), ('runbibindex', 'run BibIndex', '', 'no'), ('runbibupload', 'run BibUpload', '', 'no'), ('runwebcoll', 'run webcoll', 'collection', 'yes'), ('runbibformat', 'run BibFormat', 'format', 'yes'), ('runbibclassify', 'run BibClassify', 'taxonomy', 'yes'), ('runbibtaskex', 'run BibTaskEx example', '', 'no'), ('runbibrank', 'run BibRank', '', 'no'), ('runoaiharvest', 'run oaiharvest task', '', 'no'), ('runoairepository', 'run oairepositoryupdater task', '', 'no'), ('runbibedit', 'run Record Editor', 'collection', 'yes'), ('runbibeditmulti', 'run Multi-Record Editor', '', 'no'), ('runbibdocfile', 'run Document File Manager', '', 'no'), ('runbibmerge', 'run Record Merger', '', 'no'), ('runbibswordclient', 'run BibSword client', '', 'no'), ('runwebstatadmin', 'run WebStadAdmin', '', 'no'), ('runinveniogc', 'run InvenioGC', '', 'no'), ('runbibexport', 'run BibExport', '', 'no'), ('referee', 'referee document type doctype/category categ', 'doctype,categ', 'yes'), ('submit', 'use webSubmit', 'doctype,act,categ', 'yes'), ('viewrestrdoc', 'view restricted document', 'status', 'no'), ('viewrestrcomment', 'view restricted comment', 'status', 'no'), (WEBACCESSACTION, 'configure WebAccess', '', 'no'), (DELEGATEADDUSERROLE, 'delegate subroles inside WebAccess', 'role', 'no'), (VIEWRESTRCOLL, 'view restricted collection', 'collection', 'no'), ('cfgwebjournal', 'configure WebJournal', 'name,with_editor_rights', 'no'), ('viewcomment', 'view comments', 'collection', 'no'), ('viewlinkbacks', 'view linkbacks', 'collection', 'no'), ('sendcomment', 'send comments', 'collection', 'no'), ('attachcommentfile', 'attach files to comments', 'collection', 'no'), ('attachsubmissionfile', 'upload files to drop box during submission', '', 'no'), ('cfgbibexport', 'configure BibExport', '', 'no'), ('runbibexport', 'run BibExport', '', 'no'), ('usebaskets', 'use baskets', '', 'no'), ('useloans', 'use loans', '', 'no'), ('usegroups', 'use groups', '', 'no'), ('usealerts', 'use alerts', '', 'no'), ('usemessages', 'use messages', '', 'no'), ('viewholdings', 'view holdings', 'collection', 'yes'), ('viewstatistics', 'view statistics', 'collection', 'yes'), ('runbibcirculation', 'run BibCirculation', '', 'no'), ('moderatecomments', 'moderate comments', 'collection', 'no'), ('moderatelinkbacks', 'moderate linkbacks', 'collection', 'no'), ('runbatchuploader', 'run batchuploader', 'collection', 'yes'), ('runbibtasklet', 'run BibTaskLet', '', 'no'), ('claimpaper_view_pid_universe', 'View the Claim Paper interface', '', 'no'), ('claimpaper_claim_own_papers', 'Clam papers to his own personID', '', 'no'), ('claimpaper_claim_others_papers', 'Claim papers for others', '', 'no'), ('claimpaper_change_own_data', 'Change data associated to his own person ID', '', 'no'), ('claimpaper_change_others_data', 'Change data of any person ID', '', 'no'), ('runbibtasklet', 'run BibTaskLet', '', 'no'), ('cfgbibsched', 'configure BibSched', '', 'no') ) # Default authorizations # role action arguments DEF_AUTHS = (('basketusers', 'usebaskets', {}), ('loanusers', 'useloans', {}), ('groupusers', 'usegroups', {}), ('alertusers', 'usealerts', {}), ('messageusers', 'usemessages', {}), ('holdingsusers', 'viewholdings', {}), ('statisticsusers', 'viewstatistics', {}), ('claimpaperusers', 'claimpaper_view_pid_universe', {}), ('claimpaperoperators', 'claimpaper_view_pid_universe', {}), ('claimpaperusers', 'claimpaper_claim_own_papers', {}), ('claimpaperoperators', 'claimpaper_claim_own_papers', {}), ('claimpaperoperators', 'claimpaper_claim_others_papers', {}), ('claimpaperusers', 'claimpaper_change_own_data', {}), ('claimpaperoperators', 'claimpaper_change_own_data', {}), ('claimpaperoperators', 'claimpaper_change_others_data', {}), ) # Demo site authorizations # role action arguments DEF_DEMO_AUTHS = ( ('photocurator', 'runwebcoll', {'collection': 'Pictures'}), ('restrictedpicturesviewer', 'viewrestrdoc', {'status': 'restricted_picture'}), ('thesesviewer', VIEWRESTRCOLL, {'collection': 'Theses'}), ('referee_DEMOBOO_*', 'referee', {'doctype': 'DEMOBOO', 'categ': '*'}), ('curator', 'cfgbibknowledge', {}), ('curator', 'runbibedit', {}), ('curator', 'runbibeditmulti', {}), ('curator', 'runbibmerge', {}), ('swordcurator', 'runbibswordclient', {}), ('thesescurator', 'runbibedit', {'collection': 'Theses'}), ('thesescurator', VIEWRESTRCOLL, {'collection': 'Theses'}), ('photocurator', 'runbibedit', {'collection': 'Pictures'}), ('referee_DEMOBOO_*', 'runbibedit', {'collection': 'Books'}), ('submit_DEMOJRN_*', 'submit', {'doctype': 'DEMOJRN', 'act': 'SBI', 'categ': '*'}), ('submit_DEMOJRN_*', 'submit', {'doctype': 'DEMOJRN', 'act': 'MBI', 'categ': '*'}), ('submit_DEMOJRN_*', 'cfgwebjournal', {'name': 'AtlantisTimes', 'with_editor_rights': 'no'}), ('atlantiseditor', 'cfgwebjournal', {'name': 'AtlantisTimes', 'with_editor_rights': 'yes'}), ('referee_DEMOBOO_*', 'runbatchuploader', {'collection': 'Books'}), ('poetrycommentreader', 'viewcomment', {'collection': 'Poetry'}), ('atlantiseditor', VIEWRESTRCOLL, {'collection': 'Atlantis Times Drafts'}), ('anyuser', 'submit', {'doctype': 'DEMOART', 'act': 'SBI', 'categ': 'ARTICLE'}), ) _ = gettext_set_language(CFG_SITE_LANG) # Activities (i.e. actions) for which exists an administrative web interface. CFG_ACC_ACTIVITIES_URLS = { 'runbibedit' : (_("Run Record Editor"), "%s/%s/edit/?ln=%%s" % (CFG_SITE_URL, CFG_SITE_RECORD)), 'runbibeditmulti' : (_("Run Multi-Record Editor"), "%s/%s/multiedit/?ln=%%s" % (CFG_SITE_URL, CFG_SITE_RECORD)), 'runbibdocfile' : (_("Run Document File Manager"), "%s/submit/managedocfiles?ln=%%s" % CFG_SITE_URL), 'runbibmerge' : (_("Run Record Merger"), "%s/%s/merge/?ln=%%s" % (CFG_SITE_URL, CFG_SITE_RECORD)), 'runbibswordclient' : (_("Run BibSword client"), "%s/bibsword/?ln=%%s" % CFG_SITE_URL), 'cfgbibknowledge' : (_("Configure BibKnowledge"), "%s/kb?ln=%%s" % CFG_SITE_URL), 'cfgbibformat' : (_("Configure BibFormat"), "%s/admin/bibformat/bibformatadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgoaiharvest' : (_("Configure OAI Harvest"), "%s/admin/bibharvest/oaiharvestadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgoairepository' : (_("Configure OAI Repository"), "%s/admin/bibharvest/oairepositoryadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgbibindex' : (_("Configure BibIndex"), "%s/admin/bibindex/bibindexadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgbibrank' : (_("Configure BibRank"), "%s/admin/bibrank/bibrankadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebaccess' : (_("Configure WebAccess"), "%s/admin/webaccess/webaccessadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebcomment' : (_("Configure WebComment"), "%s/admin/webcomment/webcommentadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgweblinkback' : (_("Configure WebLinkback"), "%s/admin/weblinkback/weblinkbackadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebsearch' : (_("Configure WebSearch"), "%s/admin/websearch/websearchadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebsubmit' : (_("Configure WebSubmit"), "%s/admin/websubmit/websubmitadmin.py?ln=%%s" % CFG_SITE_URL), 'cfgwebjournal' : (_("Configure WebJournal"), "%s/admin/webjournal/webjournaladmin.py?ln=%%s" % CFG_SITE_URL), 'cfgbibsort' : (_("Configure BibSort"), "%s/admin/bibsort/bibsortadmin.py?ln=%%s" % CFG_SITE_URL), 'runbibcirculation' : (_("Run BibCirculation"), "%s/admin/bibcirculation/bibcirculationadmin.py?ln=%%s" % CFG_SITE_URL), 'runbatchuploader' : (_("Run Batch Uploader"), "%s/batchuploader/metadata?ln=%%s" % CFG_SITE_URL), 'claimpaper_claim_others_papers' : (_("Run Person/Author Manager"), "%s/person/search?ln=%%s" % CFG_SITE_URL) } CFG_WEBACCESS_MSGS = { 0: 'Try to login with another account.' % (CFG_SITE_SECURE_URL), 1: '
If you think this is not correct, please contact: %s' % (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_SUPPORT_EMAIL), 2: '
If you have any questions, please write to %s' % (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_SUPPORT_EMAIL), 3: 'Guest users are not allowed, please login.' % CFG_SITE_SECURE_URL, 4: 'The site is temporarily closed for maintenance. Please come back soon.', 5: 'Authorization failure', 6: '%s temporarily closed' % CFG_SITE_NAME, 7: 'This functionality is temporarily closed due to server maintenance. Please use only the search engine in the meantime.', 8: 'Functionality temporarily closed' } CFG_WEBACCESS_WARNING_MSGS = { 0: 'Authorization granted', 1: 'You are not authorized to perform this action.', 2: 'You are not authorized to perform any action.', 3: 'The action %s does not exist.', 4: 'Unexpected error occurred.', 5: 'Missing mandatory keyword argument(s) for this action.', 6: 'Guest accounts are not authorized to perform this action.', 7: 'Not enough arguments, user ID and action name required.', 8: 'Incorrect keyword argument(s) for this action.', 9: """Account '%s' is not yet activated.""", 10: """You were not authorized by the authentication method '%s'.""", 11: """The selected login method '%s' is not the default method for this account, please try another one.""", 12: """Selected login method '%s' does not exist.""", 13: """Could not register '%s' account.""", 14: """Could not login using '%s', because this user is unknown.""", 15: """Could not login using your '%s' account, because you have introduced a wrong password.""", 16: """External authentication troubles using '%s' (maybe temporary network problems).""", 17: """You have not yet confirmed the email address for the '%s' authentication method.""", 18: """The administrator has not yet activated your account for the '%s' authentication method.""", 19: """The site is having troubles in sending you an email for confirming your email address. The error has been logged and will be taken care of as soon as possible.""", 20: """No roles are authorized to perform action %s with the given parameters.""" } + +#There are three status key that must be here: OK, REMOVED and REVOKED +#the value doesn't matter at all +CFG_WEB_API_KEY_STATUS = { + 'OK':'OK', + 'REMOVED':'REMOVED', + 'REVOKED':'REVOKED', + 'WARNING':'WARNING' + } diff --git a/modules/webaccess/lib/external_authentication.py b/modules/webaccess/lib/external_authentication.py index 78d9bcd83..209348b5c 100644 --- a/modules/webaccess/lib/external_authentication.py +++ b/modules/webaccess/lib/external_authentication.py @@ -1,105 +1,106 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """External user authentication for Invenio.""" __revision__ = \ "$Id$" class InvenioWebAccessExternalAuthError(Exception): """Exception to signaling general external trouble.""" pass class ExternalAuth: """External authentication template example.""" def __init__(self, enforce_external_nicknames=False): """Initialize stuff here""" self.name = None # Set the following variable to True in order to import the externally # provided nickname into Invenio during the first login of a user # through this external authentication system. # If the nickname is already taken into Invenio, then it won't be # considered. self.enforce_external_nicknames = enforce_external_nicknames pass def auth_user(self, username, password, req=None): """Authenticate user-supplied USERNAME and PASSWORD. Return - None if authentication failed, or the email address of the - person if the authentication was successful. In order to do + (None, None) if authentication failed, or the (email, ext_id), where + email is the email address of the person and ext_id is the external + identifier, if the authentication was successful. In order to do this you may perhaps have to keep a translation table between usernames and email addresses. Raise InvenioWebAccessExternalAuthError in case of external troubles. """ raise NotImplementedError() - #return None + #return None, None def user_exists(self, email, req=None): """Check the external authentication system for existance of email. @return: True if the user exists, False otherwise """ raise NotImplementedError() def fetch_user_groups_membership(self, username, password=None, req=None): """Given a username and a password, returns a dictionary of groups and their description to which the user is subscribed. Raise InvenioWebAccessExternalAuthError in case of troubles. """ raise NotImplementedError() #return {} def fetch_user_nickname(self, username, password=None, req=None): """Given a username and a password, returns the right nickname belonging to that user (username could be an email). """ raise NotImplementedError() #return Nickname def fetch_user_preferences(self, username, password=None, req=None): """Given a username and a password, returns a dictionary of keys and values, corresponding to external infos and settings. userprefs = {"telephone": "2392489", "address": "10th Downing Street"} (WEBUSER WILL erase all prefs that starts by EXTERNAL_ and will store: "EXTERNAL_telephone"; all internal preferences can use whatever name but starting with EXTERNAL). If a pref begins with HIDDEN_ it will be ignored. """ raise NotImplementedError() #return {} def fetch_all_users_groups_membership(self, req=None): """Fetch all the groups with a description, and users who belong to each groups. @return: {'mygroup': ('description', ['email1', 'email2', ...]), ...} """ raise NotImplementedError() def robot_login_method_p(): """Return True if this method is dedicated to robots and should not therefore be available as a choice to regular users upon login. """ return False robot_login_method_p = staticmethod(robot_login_method_p) diff --git a/modules/webaccess/lib/external_authentication_cern.py b/modules/webaccess/lib/external_authentication_cern.py index 21c8af622..631aaad9d 100644 --- a/modules/webaccess/lib/external_authentication_cern.py +++ b/modules/webaccess/lib/external_authentication_cern.py @@ -1,195 +1,196 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """External user authentication for CERN NICE/CRA Invenio.""" __revision__ = \ "$Id$" import httplib import socket import re from invenio.errorlib import register_exception from invenio.external_authentication import ExternalAuth, \ InvenioWebAccessExternalAuthError from invenio.external_authentication_cern_wrapper import AuthCernWrapper # Tunable list of settings to be hidden CFG_EXTERNAL_AUTH_CERN_HIDDEN_SETTINGS = ['auth', 'respccid', 'ccid'] # Tunable list of groups to be hidden CFG_EXTERNAL_AUTH_HIDDEN_GROUPS = ( 'All Exchange People', 'CERN Users', 'cern-computing-postmasters@cern.ch', 'cern-nice2000-postmasters@cern.ch', 'CMF FrontEnd Users', 'CMF_NSC_259_NSU', 'Domain Users', 'GP Apply Favorites Redirection', 'GP Apply NoAdmin', 'info-terminalservices@cern.ch', 'info-terminalservices-members@cern.ch', 'IT Web IT', 'NICE Deny Enforce Password-protected Screensaver', 'NICE Enforce Password-protected Screensaver', 'NICE LightWeight Authentication WS Users', 'NICE MyDocuments Redirection (New)', 'NICE Profile Redirection', 'NICE Terminal Services Users', 'NICE Users', 'NICE VPN Users', ) CFG_EXTERNAL_AUTH_HIDDEN_GROUPS_RE = ( re.compile(r'Users by Letter [A-Z]'), re.compile(r'building-[\d]+'), re.compile(r'Users by Home CERNHOME[A-Z]'), ) class ExternalAuthCern(ExternalAuth): """ External authentication example for a custom HTTPS-based authentication service (called "CERN NICE"). """ def __init__(self): """Initialize stuff here""" ExternalAuth.__init__(self) try: self.connection = AuthCernWrapper() except (httplib.CannotSendRequest, socket.error, AttributeError, IOError, TypeError), msg: # Let the user note that # no connection is available register_exception(alert_admin=True) raise InvenioWebAccessExternalAuthError, msg def _try_twice(self, funct, params): """Try twice to execute funct on self.connection passing it params. If for various reason the connection doesn't work it's restarted """ try: ret = funct(self.connection, **params) except (httplib.CannotSendRequest, socket.error, AttributeError, IOError, TypeError): try: self.connection = AuthCernWrapper() ret = funct(self.connection, **params) except (httplib.CannotSendRequest, socket.error, AttributeError, IOError, TypeError): register_exception(alert_admin=True) self.connection = None raise InvenioWebAccessExternalAuthError return ret def auth_user(self, username, password, req=None): """ Check USERNAME and PASSWORD against CERN NICE/CRA database. - Return None if authentication failed, or the email address of the + Return (None, None) if authentication failed, or the + (email address, nickname) of the person if the authentication was successful. In order to do this you may perhaps have to keep a translation table between usernames and email addresses. If it is the first time the user logs in Invenio the nickname is stored alongside the email. If this nickname is unfortunatly already in use it is discarded. Otherwise it is ignored. Raise InvenioWebAccessExternalAuthError in case of external troubles. """ infos = self._try_twice(funct=AuthCernWrapper.get_user_info, \ params={"user_name":username, "password":password}) if "email" in infos: - return infos["email"] + return infos["email"], infos["login"] else: - return None + return None, None def user_exists(self, email, req=None): """Checks against CERN NICE/CRA for existance of email. @return: True if the user exists, False otherwise """ users = self._try_twice(funct=AuthCernWrapper.list_users, \ params={"display_name":email}) return email.upper() in [user['email'].upper() for user in users] def fetch_user_groups_membership(self, email, password=None, req=None): """Fetch user groups membership from the CERN NICE/CRA account. @return: a dictionary of groupname, group description """ groups = self._try_twice(funct=AuthCernWrapper.get_groups_for_user, \ params={"user_name":email}) # Filtering out uncomfortable groups groups = [group for group in groups if group not in CFG_EXTERNAL_AUTH_HIDDEN_GROUPS] for regexp in CFG_EXTERNAL_AUTH_HIDDEN_GROUPS_RE: for group in groups: if regexp.match(group): groups.remove(group) # Produce list of double value: group/mailing list(with stripped # @cern.ch) name, and group/description built from the name. return dict(map(lambda x: (x.find('@') > -1 and x[:x.find('@')] or x, '@' in x and x + ' (CERN Mailing list)' or x + ' (CERN Group)'), groups)) def fetch_user_nickname(self, username, password, req=None): """Given a username and a password, returns the right nickname belonging to that user (username could be an email). """ infos = self._try_twice(funct=AuthCernWrapper.get_user_info, params={"user_name":username, "password":password}) if "login" in infos: return infos["login"] else: return None def fetch_user_preferences(self, username, password=None, req=None): """Fetch user preferences/settings from the CERN Nice account. the external key will be '1' if the account is external to NICE/CRA, otherwise 0 @return: a dictionary. Note: auth and respccid are hidden """ prefs = self._try_twice(funct=AuthCernWrapper.get_user_info, \ params={"user_name":username, "password":password}) ret = {} try: if int(prefs['auth']) == 3 \ and (int(prefs['respccid']) > 0 \ or not prefs['email'].endswith('@cern.ch')): ret['external'] = '1' else: ret['external'] = '0' except KeyError: ret['external'] = '1' for key, value in prefs.items(): if key in CFG_EXTERNAL_AUTH_CERN_HIDDEN_SETTINGS: ret['HIDDEN_' + key] = value else: ret[key] = value ## Hack to be forward-compatible with CERN SSO implementation if ret.has_key('company'): ret['homeinstitute'] = ret['company'] del ret['company'] if ret.has_key('name'): ret['fullname'] = ret['name'] del ret['name'] return ret diff --git a/modules/webaccess/lib/external_authentication_ldap.py b/modules/webaccess/lib/external_authentication_ldap.py index 0087b9058..2bcbc38ef 100644 --- a/modules/webaccess/lib/external_authentication_ldap.py +++ b/modules/webaccess/lib/external_authentication_ldap.py @@ -1,234 +1,234 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """External user authentication for EPFL's LDAP instance. This LDAP external authentication system relies on a collaborative LDAP organized like this: o=EPFL, c=CH | | +--ou=groups | | | | | +--- cn=xxx | displayName= name of the group | uniqueIdentifier= some local id for groups | | | +--ou=users | | | | | +---uid= some local id for users (ex: grfavre) | uniqueIdentifier= another local id (ex: 128933) | mail=xxx@xxx.xx | memberOf= id of a group | memberOf= id of another group | + This example of an LDAP authentication should help you develop yours in your specific installation. """ __revision__ = \ "$Id$" import ldap from invenio.external_authentication import ExternalAuth, \ InvenioWebAccessExternalAuthError CFG_EXTERNAL_AUTH_LDAP_SERVERS = ['ldap://scoldap.epfl.ch'] CFG_EXTERNAL_AUTH_LDAP_CONTEXT = "o=EPFL,c=CH" CFG_EXTERNAL_AUTH_LDAP_USER_UID = ["uid", "uniqueIdentifier", "mail"] CFG_EXTERNAL_AUTH_LDAP_MAIL_ENTRY = 'mail' CFG_EXTERNAL_AUTH_LDAP_GROUP_MEMBERSHIP = 'memberOf' CFG_EXTERNAL_AUTH_LDAP_GROUP_UID = 'uniqueIdentifier' CFG_EXTERNAL_AUTH_LDAP_GROUP_NAME = 'displayName' CFG_EXTERNAL_AUTH_LDAP_HIDDEN_GROUPS = ['EPFL-unit', 'users'] class ExternalAuthLDAP(ExternalAuth): """ External authentication example for a custom LDAP-based authentication service. """ def __init__(self): """Initialize stuff here""" ExternalAuth.__init__(self) self.enforce_external_nicknames = True def _ldap_try (self, command): """ Try to run the specified command on the first LDAP server that is not down.""" for server in CFG_EXTERNAL_AUTH_LDAP_SERVERS: try: connection = ldap.initialize(server) return command(connection) except ldap.SERVER_DOWN, error_message: continue raise InvenioWebAccessExternalAuthError def auth_user(self, username, password, req=None): """ Check USERNAME and PASSWORD against the LDAP system. - Return None if authentication failed, or the email address of the + Return (None, None) if authentication failed, or the (email address, user_dn) of the person if the authentication was successful. Raise InvenioWebAccessExternalAuthError in case of external troubles. Note: for SSO the parameter are discarded and overloaded by Shibboleth variables """ if not password: - return None + return None, None query = '(|' + ''.join (['(%s=%s)' % (attrib, username) for attrib in CFG_EXTERNAL_AUTH_LDAP_USER_UID]) \ + ')' def _check (connection): users = connection.search_s(CFG_EXTERNAL_AUTH_LDAP_CONTEXT, ldap.SCOPE_SUBTREE, query) # We pick the first result, as all the data we are interested # in should be the same in all the entries. if len(users): user_dn, user_info = users [0] else: - return None + return None, None try: connection.simple_bind_s(user_dn, password) except ldap.INVALID_CREDENTIALS: # It is enough to fail on one server to consider the credential # to be invalid - return None - return user_info[CFG_EXTERNAL_AUTH_LDAP_MAIL_ENTRY][0] + return None, None + return user_info[CFG_EXTERNAL_AUTH_LDAP_MAIL_ENTRY][0], user_dn return self._ldap_try(_check) def user_exists(self, email, req=None): """Check the external authentication system for existance of email. @return: True if the user exists, False otherwise """ query = '(%s=%s)' % (CFG_EXTERNAL_AUTH_LDAP_MAIL_ENTRY, email) def _check (connection): users = connection.search_s(CFG_EXTERNAL_AUTH_LDAP_CONTEXT, ldap.SCOPE_SUBTREE, query) return len(users) != 0 return self._ldap_try(_check) def fetch_user_nickname(self, username, password=None, req=None): """Given a username and a password, returns the right nickname belonging to that user (username could be an email). """ query = '(|' + ''.join (['(%s=%s)' % (attrib, username) for attrib in CFG_EXTERNAL_AUTH_LDAP_USER_UID]) \ + ')' def _get_nickname(connection): users = connection.search_s(CFG_EXTERNAL_AUTH_LDAP_CONTEXT, ldap.SCOPE_SUBTREE, query) # We pick the first result, as all the data we are interested # in should be the same in all the entries. if len(users): user_dn, user_info = users [0] else: return None emails = user_info[CFG_EXTERNAL_AUTH_LDAP_MAIL_ENTRY] if len(emails): email = emails[0] else: return False (left_part, right_part) = email.split('@') nickname = left_part.replace('.', ' ').title() if right_part != 'epfl.ch': nickname += ' - ' + right_part return nickname return self._ldap_try(_get_nickname) def fetch_user_groups_membership(self, username, password=None, req=None): """Given a username and a password, returns a dictionary of groups and their description to which the user is subscribed. Raise InvenioWebAccessExternalAuthError in case of troubles. """ query_person = '(|' + ''.join (['(%s=%s)' % (attrib, username) for attrib in CFG_EXTERNAL_AUTH_LDAP_USER_UID]) \ + ')' def _get_groups(connection): users = connection.search_s(CFG_EXTERNAL_AUTH_LDAP_CONTEXT, ldap.SCOPE_SUBTREE, query_person) if len(users): user_dn, user_info = users [0] else: return {} groups = {} group_ids = user_info[CFG_EXTERNAL_AUTH_LDAP_GROUP_MEMBERSHIP] for group_id in group_ids: query_group = '(%s=%s)' % (CFG_EXTERNAL_AUTH_LDAP_GROUP_UID, group_id) ldap_group = connection.search_s(CFG_EXTERNAL_AUTH_LDAP_CONTEXT, ldap.SCOPE_SUBTREE, query_group) if len(ldap_group): group_dn, group_infos = ldap_group[0] group_name = group_infos[CFG_EXTERNAL_AUTH_LDAP_GROUP_NAME][0] if group_name in CFG_EXTERNAL_AUTH_LDAP_HIDDEN_GROUPS: continue groups[group_id] = group_name return groups return self._ldap_try(_get_groups) def fetch_user_preferences(self, username, password=None, req=None): """Given a username and a password, returns a dictionary of keys and values, corresponding to external infos and settings. userprefs = {"telephone": "2392489", "address": "10th Downing Street"} (WEBUSER WILL erase all prefs that starts by EXTERNAL_ and will store: "EXTERNAL_telephone"; all internal preferences can use whatever name but starting with EXTERNAL). If a pref begins with HIDDEN_ it will be ignored. """ query = '(|' + ''.join (['(%s=%s)' % (attrib, username) for attrib in CFG_EXTERNAL_AUTH_LDAP_USER_UID]) \ + ')' def _get_personal_infos(connection): users = connection.search_s(CFG_EXTERNAL_AUTH_LDAP_CONTEXT, ldap.SCOPE_SUBTREE, query) if len(users): user_dn, user_info = users [0] return user_info else: return {} return self._ldap_try(_get_personal_infos) diff --git a/modules/webaccess/lib/external_authentication_robot.py b/modules/webaccess/lib/external_authentication_robot.py index 1fc4a5a51..bea3609a6 100644 --- a/modules/webaccess/lib/external_authentication_robot.py +++ b/modules/webaccess/lib/external_authentication_robot.py @@ -1,407 +1,413 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """External user authentication for simple robots This implement an external authentication system suitable for robots usage. User attributes are retrieved directly from the form dictionary of the request object. """ import os import sys import hmac import time import base64 if sys.hexversion < 0x2050000: import sha as sha1 else: from hashlib import sha1 from cPickle import dumps from zlib import decompress, compress from invenio.jsonutils import json, json_unicode_to_utf8 from invenio.shellutils import mymkdir from invenio.external_authentication import ExternalAuth, InvenioWebAccessExternalAuthError from invenio.config import CFG_ETCDIR, CFG_SITE_URL, CFG_SITE_SECURE_URL CFG_ROBOT_EMAIL_ATTRIBUTE_NAME = 'email' CFG_ROBOT_NICKNAME_ATTRIBUTE_NAME = 'nickname' CFG_ROBOT_GROUPS_ATTRIBUTE_NAME = 'groups' CFG_ROBOT_TIMEOUT_ATTRIBUTE_NAME = '__timeout__' CFG_ROBOT_USERIP_ATTRIBUTE_NAME = '__userip__' CFG_ROBOT_GROUPS_SEPARATOR = ';' CFG_ROBOT_URL_TIMEOUT = 3600 CFG_ROBOT_KEYS_PATH = os.path.join(CFG_ETCDIR, 'webaccess', 'robot_keys.dat') def normalize_ip(ip, up_to_bytes=4): """ @param up_to_bytes: set this to the number of bytes that should be considered in the normalization. E.g. is this is set two 2, only the first two bytes will be considered, while the remaining two will be set to 0. @return: a normalized IP, e.g. 123.02.12.12 -> 123.2.12.12 """ try: ret = [] for i, number in enumerate(ip.split(".")): if i < up_to_bytes: ret.append(str(int(number))) else: ret.append("0") return '.'.join(ret) except ValueError: ## e.g. if it's IPV6 ::1 return ip def load_robot_keys(): """ @return: the robot key dictionary. """ from cPickle import loads from zlib import decompress try: robot_keys = loads(decompress(open(CFG_ROBOT_KEYS_PATH).read())) if not isinstance(robot_keys, dict): return {} else: return robot_keys except: return {} class ExternalAuthRobot(ExternalAuth): """ This class implement an external authentication method suitable to be used by an external service that, after having authenticated a user, will provide a URL to the user that, once followed, will successfully login the user into Invenio, with any detail the external service decided to provide to the Invenio installation. Such URL should be built as follows: BASE?QUERY where BASE is CFG_SITE_SECURE_URL/youraccount/robotlogin and QUERY is a urlencoded mapping of the following key->values: - assertion: an assertion, i.e. a piece of information describing the user, see below for more details. - robot: the identifier of the external service providing the assertion - login_method: the name of the login method as defined in CFG_EXTERNAL_AUTHENTICATION. - digest: the digest of the signature as detailed below. - referer: the URL where the user should be redirected after successful login (it is called referer as, for historical reasons, this is the original URL of the page on which, a human-user has clicked "login". the "assertion" should be a JSON serialized mapping with the following keys: - email: the email of the user (i.e. its identifier). - nickname: optional nickname of the user. - groups: an optional ';'-separated list of groups to which the user belongs to. - __timeout__: the number of seconds (floating point) from the Epoch, after which the URL will no longer be valid. (expressed in UTC) - __userip__: the IP address of the user for whom this URL has been created. (if the user will follow this URL using a different URL the request will not be valid) - any other key can be added and will be merged in the external user settings. If L{use_zlib} is True the assertion is a base64-url-flavour encoding of the zlib compression of the original assertion (useful for shortening the URL while make it easy to type). The "digest" is the hexadecimal representation of the digest using the HMAC-SHA1 method to sign the assertion with the secret key associated with the robot for the given login_method. @param enforce_external_nicknames: whether to trust nicknames provided by the external service and use them (if possible) as unique identifier in the system. @type enforce_external_nicknames: boolean @param email_attribute_name: the actual key in the assertion that will contain the email. @type email_attribute_name: string @param nickname_attribute_name: the actual key in the assertion that will contain the nickname. @type nickname_attribute_name: string @param groups_attribute_name: the actual key in the assertion that will contain the groups. @type groups_attribute_name: string @param groups_separator: the string used to separate groups. @type groups_separator: string @param timeout_attribute_name: the actual key in the assertion that will contain the timeout. @type timeout_attribute_name: string @param userip_attribute_name: the actual key in the assertion that will contain the user IP. @type userip_attribute_name: string + @param external_id_attribute_name: the actual string that identifies the + user in the external authentication system. By default this is set + to be the same as the nickname, but this can be configured. @param check_user_ip: whether to check for the IP address of the user using the given URL, against the IP address stored in the assertion to be identical. If 0, no IP check will be performed, if 1, only the 1st byte will be compared, if 2, only the first two bytes will be compared, if 3, only the first three bytes, and if 4, the whole IP address will be checked. @type check_user_ip: int @param use_zlib: whether to use base64-url-flavour encoding of the zlib compression of the json serialization of the assertion or simply the json serialization of the assertion. @type use_zlib: boolean """ def __init__(self, enforce_external_nicknames=False, email_attribute_name=CFG_ROBOT_EMAIL_ATTRIBUTE_NAME, nickname_attribute_name=CFG_ROBOT_NICKNAME_ATTRIBUTE_NAME, groups_attribute_name=CFG_ROBOT_GROUPS_ATTRIBUTE_NAME, groups_separator=CFG_ROBOT_GROUPS_SEPARATOR, timeout_attribute_name=CFG_ROBOT_TIMEOUT_ATTRIBUTE_NAME, userip_attribute_name=CFG_ROBOT_USERIP_ATTRIBUTE_NAME, check_user_ip=4, + external_id_attribute_name=CFG_ROBOT_NICKNAME_ATTRIBUTE_NAME, use_zlib=True, ): ExternalAuth.__init__(self, enforce_external_nicknames=enforce_external_nicknames) self.email_attribute_name = email_attribute_name self.nickname_attribute_name = nickname_attribute_name self.groups_attribute_name = groups_attribute_name self.groups_separator = groups_separator self.timeout_attribute_name = timeout_attribute_name self.userip_attribute_name = userip_attribute_name + self.external_id_attribute_name = external_id_attribute_name self.check_user_ip = check_user_ip self.use_zlib = use_zlib def __extract_attribute(self, req): """ Load from the request the given assertion, extract all the attribute to properly login the user, and verify that the data are actually both well formed and signed correctly. """ from invenio.webinterface_handler import wash_urlargd args = wash_urlargd(req.form, { 'assertion': (str, ''), 'robot': (str, ''), 'digest': (str, ''), 'login_method': (str, '')}) assertion = args['assertion'] digest = args['digest'] robot = args['robot'] login_method = args['login_method'] shared_key = load_robot_keys().get(login_method, {}).get(robot) if shared_key is None: raise InvenioWebAccessExternalAuthError("A key does not exist for robot: %s, login_method: %s" % (robot, login_method)) if not self.verify(shared_key, assertion, digest): raise InvenioWebAccessExternalAuthError("The provided assertion does not validate against the digest %s for robot %s" % (repr(digest), repr(robot))) if self.use_zlib: try: ## Workaround to Perl implementation that does not add ## any padding to the base64 encoding. needed_pad = (4 - len(assertion) % 4) % 4 assertion += needed_pad * '=' assertion = decompress(base64.urlsafe_b64decode(assertion)) except: raise InvenioWebAccessExternalAuthError("The provided assertion is corrupted") data = json_unicode_to_utf8(json.loads(assertion)) if not isinstance(data, dict): raise InvenioWebAccessExternalAuthError("The provided assertion is invalid") timeout = data[self.timeout_attribute_name] if timeout < time.time(): raise InvenioWebAccessExternalAuthError("The provided assertion is expired") userip = data.get(self.userip_attribute_name) if not self.check_user_ip or (normalize_ip(userip, self.check_user_ip) == normalize_ip(req.remote_ip, self.check_user_ip)): return data else: raise InvenioWebAccessExternalAuthError("The provided assertion has been issued for a different IP address (%s instead of %s)" % (userip, req.remote_ip)) def auth_user(self, username, password, req=None): """Authenticate user-supplied USERNAME and PASSWORD. Return None if authentication failed, or the email address of the person if the authentication was successful. In order to do this you may perhaps have to keep a translation table between usernames and email addresses. Raise InvenioWebAccessExternalAuthError in case of external troubles. """ data = self.__extract_attribute(req) email = data.get(self.email_attribute_name) + ext_id = data.get(self.external_id_attribute_name, email) if email: if isinstance(email, str): - return email.strip().lower() + return email.strip().lower(), ext_id.strip() else: raise InvenioWebAccessExternalAuthError("The email provided in the assertion is invalid: %s" % (repr(email))) else: - return None + return None, None def fetch_user_groups_membership(self, username, password=None, req=None): """Given a username and a password, returns a dictionary of groups and their description to which the user is subscribed. Raise InvenioWebAccessExternalAuthError in case of troubles. """ if self.groups_attribute_name: data = self.__extract_attribute(req) groups = data.get(self.groups_attribute_name) if groups: if isinstance(groups, str): groups = [group.strip() for group in groups.split(self.groups_separator)] return dict(zip(groups, groups)) else: raise InvenioWebAccessExternalAuthError("The groups provided in the assertion are invalid: %s" % (repr(groups))) return {} def fetch_user_nickname(self, username, password=None, req=None): """Given a username and a password, returns the right nickname belonging to that user (username could be an email). """ if self.nickname_attribute_name: data = self.__extract_attribute(req) nickname = data.get(self.nickname_attribute_name) if nickname: if isinstance(nickname, str): return nickname.strip().lower() else: raise InvenioWebAccessExternalAuthError("The nickname provided in the assertion is invalid: %s" % (repr(nickname))) return None def fetch_user_preferences(self, username, password=None, req=None): """Given a username and a password, returns a dictionary of keys and values, corresponding to external infos and settings. userprefs = {"telephone": "2392489", "address": "10th Downing Street"} (WEBUSER WILL erase all prefs that starts by EXTERNAL_ and will store: "EXTERNAL_telephone"; all internal preferences can use whatever name but starting with EXTERNAL). If a pref begins with HIDDEN_ it will be ignored. """ data = self.__extract_attribute(req) for key in (self.email_attribute_name, self.groups_attribute_name, self.nickname_attribute_name, self.timeout_attribute_name, self.userip_attribute_name): if key and key in data: del data[key] return data def robot_login_method_p(): """Return True if this method is dedicated to robots and should not therefore be available as a choice to regular users upon login. """ return True robot_login_method_p = staticmethod(robot_login_method_p) def sign(secret, assertion): """ @return: a signature of the given assertion. @rtype: string @note: override this method if you want to change the signature algorithm (e.g. to use GPG). @see: L{verify} """ return hmac.new(secret, assertion, sha1).hexdigest() sign = staticmethod(sign) def verify(secret, assertion, signature): """ @return: True if the signature is valid @rtype: boolean @note: override this method if you want to change the signature algorithm (e.g. to use GPG) @see: L{sign} """ return hmac.new(secret, assertion, sha1).hexdigest() == signature verify = staticmethod(verify) def test_create_example_url(self, email, login_method, robot, ip, assertion=None, timeout=None, referer=None, groups=None, nickname=None): """ Create a test URL to test the robot login. @param email: email of the user we want to login as. @type email: string @param login_method: the login_method name as specified in CFG_EXTERNAL_AUTHENTICATION. @type login_method: string @param robot: the identifier of this robot. @type robot: string @param assertion: any further data we want to send to. @type: json serializable mapping @param ip: the IP of the user. @type: string @param timeout: timeout when the URL will expire (in seconds from the Epoch) @type timeout: float @param referer: the URL where to land after successful login. @type referer: string @param groups: the list of optional group of the user. @type groups: list of string @param nickname: the optional nickname of the user. @type nickname: string @return: the URL to login as the user. @rtype: string """ from invenio.access_control_config import CFG_EXTERNAL_AUTHENTICATION from invenio.urlutils import create_url if assertion is None: assertion = {} assertion[self.email_attribute_name] = email if nickname: assertion[self.nickname_attribute_name] = nickname if groups: assertion[self.groups_attribute_name] = self.groups_separator.join(groups) if timeout is None: timeout = time.time() + CFG_ROBOT_URL_TIMEOUT assertion[self.timeout_attribute_name] = timeout if referer is None: referer = CFG_SITE_URL if login_method is None: for a_login_method, details in CFG_EXTERNAL_AUTHENTICATION.iteritems(): if details[2]: login_method = a_login_method break robot_keys = load_robot_keys() assertion[self.userip_attribute_name] = ip assertion = json.dumps(assertion) if self.use_zlib: assertion = base64.urlsafe_b64encode(compress(assertion)) shared_key = robot_keys[login_method][robot] digest = self.sign(shared_key, assertion) return create_url("%s%s" % (CFG_SITE_SECURE_URL, "/youraccount/robotlogin"), { 'assertion': assertion, 'robot': robot, 'login_method': login_method, 'digest': digest, 'referer': referer}) def update_robot_key(login_method, robot, key=None): """ Utility to update the robot key store. @param login_method: the login_method name as per L{CFG_EXTERNAL_AUTHENTICATION}. It should correspond to a robot-enable login method. @type: string @param robot: the robot identifier @type robot: string @param key: the secret @type key: string @note: if the secret is empty the corresponding key will be removed. """ robot_keys = load_robot_keys() if key is None and login_method in robot_keys and robot in robot_keys[login_method]: del robot_keys[login_method][robot] if not robot_keys[login_method]: del robot_keys[login_method] else: if login_method not in robot_keys: robot_keys[login_method] = {} robot_keys[login_method][robot] = key mymkdir(os.path.join(CFG_ETCDIR, 'webaccess')) open(CFG_ROBOT_KEYS_PATH, 'w').write(compress(dumps(robot_keys, -1))) diff --git a/modules/webaccess/lib/external_authentication_sso.py b/modules/webaccess/lib/external_authentication_sso.py index 942da99f9..493019cd0 100644 --- a/modules/webaccess/lib/external_authentication_sso.py +++ b/modules/webaccess/lib/external_authentication_sso.py @@ -1,201 +1,202 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """External user authentication for CERN NICE/CRA Invenio.""" __revision__ = \ "$Id$" import re from invenio.external_authentication import ExternalAuth # Tunable list of settings to be hidden CFG_EXTERNAL_AUTH_HIDDEN_SETTINGS = ('auth', 'respccid', 'personid') # Tunable list of groups to be hidden CFG_EXTERNAL_AUTH_HIDDEN_GROUPS = ( 'All Exchange People', 'CERN Users', 'cern-computing-postmasters', 'cern-nice2000-postmasters', 'CMF FrontEnd Users', 'CMF_NSC_259_NSU', 'Domain Users', 'GP Apply Favorites Redirection', 'GP Apply NoAdmin', 'info-terminalservices', 'info-terminalservices-members', 'IT Web IT', 'NICE Deny Enforce Password-protected Screensaver', 'NICE Enforce Password-protected Screensaver', 'NICE LightWeight Authentication WS Users', 'NICE MyDocuments Redirection (New)', 'NICE Profile Redirection', 'NICE Terminal Services Users', 'NICE Users', 'NICE VPN Users', ) CFG_EXTERNAL_AUTH_HIDDEN_GROUPS_RE = ( re.compile(r'Users by Letter [A-Z]'), re.compile(r'building-[\d]+'), re.compile(r'Users by Home CERNHOME[A-Z]'), ) # Prefix name for Shibboleth variables CFG_EXTERNAL_AUTH_SSO_PREFIX_NAME = 'ADFS_' # Name of the variable containing groups CFG_EXTERNAL_AUTH_SSO_GROUP_VARIABLE = CFG_EXTERNAL_AUTH_SSO_PREFIX_NAME+'GROUP' # Name of the variable containing login name CFG_EXTERNAL_AUTH_SSO_LOGIN_VARIABLE = CFG_EXTERNAL_AUTH_SSO_PREFIX_NAME+'LOGIN' # Name of the variable containing email CFG_EXTERNAL_AUTH_SSO_EMAIL_VARIABLE = CFG_EXTERNAL_AUTH_SSO_PREFIX_NAME+'EMAIL' # Name of the variable containing groups CFG_EXTERNAL_AUTH_SSO_GROUP_VARIABLE = CFG_EXTERNAL_AUTH_SSO_PREFIX_NAME+'GROUP' # Separator character for group variable CFG_EXTERNAL_AUTH_SSO_GROUPS_SEPARATOR = ';' class ExternalAuthSSO(ExternalAuth): """ External authentication example for a custom SSO-based authentication service. """ def __init__(self, enforce_external_nicknames=False): """Initialize stuff here""" ExternalAuth.__init__(self, enforce_external_nicknames) self.egroup_cache = None def in_shibboleth(self, req): """ Return True if the current request handler is actually under Shibboleth control. """ return req.subprocess_env.has_key(CFG_EXTERNAL_AUTH_SSO_EMAIL_VARIABLE) def auth_user(self, username, password, req=None): """ Check USERNAME and PASSWORD against the SSO system. - Return None if authentication failed, or the email address of the + Return (None, None) if authentication failed, or the + (email address, nickname) of the person if the authentication was successful. In order to do this you may perhaps have to keep a translation table between usernames and email addresses. If it is the first time the user logs in Invenio the nickname is stored alongside the email. If this nickname is unfortunatly already in use it is discarded. Otherwise it is ignored. Raise InvenioWebAccessExternalAuthError in case of external troubles. Note: for SSO the parameter are discarded and overloaded by Shibboleth variables """ if req: req.add_common_vars() if req.subprocess_env.has_key(CFG_EXTERNAL_AUTH_SSO_EMAIL_VARIABLE): - return req.subprocess_env[CFG_EXTERNAL_AUTH_SSO_EMAIL_VARIABLE] - return None + return req.subprocess_env[CFG_EXTERNAL_AUTH_SSO_EMAIL_VARIABLE], req.subprocess_env[CFG_EXTERNAL_AUTH_SSO_LOGIN_VARIABLE] + return None, None #def user_exists(self, email, req=None): #"""Checks against CERN NICE/CRA for existance of email. #@return: True if the user exists, False otherwise #""" #users = self._try_twice(funct=AuthCernWrapper.list_users, \ #params={"display_name":email}) #return email.upper() in [user['email'].upper() for user in users] def fetch_user_groups_membership(self, email, password=None, req=None): """Fetch user groups membership from the SSO system. @return: a dictionary of groupname, group description Note: for SSO the parameter are discarded and overloaded by Shibboleth variables """ return self._fetch_egroups(req) def fetch_user_nickname(self, username, password=None, req=None): """Given a username and a password, returns the right nickname belonging to that user (username could be an email). Note: for SSO the parameter are discarded and overloaded by Shibboleth variables """ if req: req.add_common_vars() if req.subprocess_env.has_key(CFG_EXTERNAL_AUTH_SSO_LOGIN_VARIABLE): return req.subprocess_env[CFG_EXTERNAL_AUTH_SSO_LOGIN_VARIABLE] else: return None def _fetch_egroups(self, req=None): if False: #self.egroup_cache is not None: return self.egroup_cache elif req: req.add_common_vars() if req.subprocess_env.has_key(CFG_EXTERNAL_AUTH_SSO_GROUP_VARIABLE): groups = req.subprocess_env[CFG_EXTERNAL_AUTH_SSO_GROUP_VARIABLE].split(CFG_EXTERNAL_AUTH_SSO_GROUPS_SEPARATOR) # Filtering out uncomfortable groups groups = [group for group in groups if group not in CFG_EXTERNAL_AUTH_HIDDEN_GROUPS] for regexp in CFG_EXTERNAL_AUTH_HIDDEN_GROUPS_RE: for group in groups: if regexp.match(group): groups.remove(group) self.egroup_cache = dict(map(lambda x: (x, '@' in x and x + ' (Mailing list)' \ or x + ' (Group)'), groups)) return self.egroup_cache return {} def _fetch_particular_preferences(self, req=None): """This hidden method is there to be overwritten in order to get some particular value from non standard variables. """ if req: ret = {} req.add_common_vars() if req.subprocess_env.has_key('HTTP_SHIB_AUTHENTICATION_METHOD'): ret['authmethod'] = req.subprocess_env['HTTP_SHIB_AUTHENTICATION_METHOD'] egroups = self._fetch_egroups(req) ret['external'] = 'CERN External Users' in egroups and '1' or '0' return ret return {} def fetch_user_preferences(self, username, password=None, req=None): """Fetch user preferences/settings from the SSO account. the external key will be '1' if the account is external to SSO, otherwise 0 @return: a dictionary. Note: for SSO the parameter are discarded and overloaded by Shibboleth variables """ if req: req.add_common_vars() ret = {} prefs = self._fetch_particular_preferences(req) for key, value in req.subprocess_env.items(): if key.startswith(CFG_EXTERNAL_AUTH_SSO_PREFIX_NAME) and not key == CFG_EXTERNAL_AUTH_SSO_GROUP_VARIABLE: prefs[key[len(CFG_EXTERNAL_AUTH_SSO_PREFIX_NAME):].lower()] = value for key, value in prefs.items(): if key in CFG_EXTERNAL_AUTH_HIDDEN_SETTINGS: ret['HIDDEN_' + key] = value else: ret[key] = value return ret return {} diff --git a/modules/webaccess/lib/webaccess_regression_tests.py b/modules/webaccess/lib/webaccess_regression_tests.py index 3a9b24bbd..d96292cf4 100644 --- a/modules/webaccess/lib/webaccess_regression_tests.py +++ b/modules/webaccess/lib/webaccess_regression_tests.py @@ -1,241 +1,305 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """WebAccess Regression Test Suite.""" __revision__ = "$Id$" import unittest import socket import time import cgi from urlparse import urlparse, urlunparse from urllib import urlopen, urlencode from invenio.access_control_admin import acc_add_role, acc_delete_role, \ acc_get_role_definition from invenio.access_control_firerole import compile_role_definition, \ serialize, deserialize from invenio.config import CFG_SITE_URL, CFG_SITE_SECURE_URL, CFG_DEVEL_SITE from invenio.testutils import make_test_suite, run_test_suite, \ test_web_page_content, merge_error_messages +from invenio.dbquery import run_sql class WebAccessWebPagesAvailabilityTest(unittest.TestCase): """Check WebAccess web pages whether they are up or not.""" def test_webaccess_admin_interface_availability(self): """webaccess - availability of WebAccess Admin interface pages""" baseurl = CFG_SITE_URL + '/admin/webaccess/webaccessadmin.py/' _exports = ['', 'delegate_startarea', 'manageaccounts'] error_messages = [] for url in [baseurl + page for page in _exports]: # first try as guest: error_messages.extend(test_web_page_content(url, username='guest', expected_text= 'Authorization failure')) # then try as admin: error_messages.extend(test_web_page_content(url, username='admin')) if error_messages: self.fail(merge_error_messages(error_messages)) return def test_webaccess_admin_guide_availability(self): """webaccess - availability of WebAccess Admin guide pages""" url = CFG_SITE_URL + '/help/admin/webaccess-admin-guide' error_messages = test_web_page_content(url, expected_text="WebAccess Admin Guide") if error_messages: self.fail(merge_error_messages(error_messages)) return class WebAccessFireRoleTest(unittest.TestCase): """Check WebAccess behaviour WRT FireRole.""" def setUp(self): """Create a fake role.""" self.role_name = 'test' self.role_description = 'test role' self.role_definition = 'allow email /.*@cern.ch/' self.role_id, dummy, dummy, dummy = acc_add_role(self.role_name, self.role_description, serialize(compile_role_definition(self.role_definition)), self.role_definition) def tearDown(self): """Drop the fake role.""" acc_delete_role(self.role_id) def test_webaccess_firerole_serialization(self): """webaccess - firerole role definition correctly serialized""" def_ser = compile_role_definition(self.role_definition) tmp_def_ser = acc_get_role_definition(self.role_id) self.assertEqual(def_ser, deserialize(tmp_def_ser)) class WebAccessUseBasketsTest(unittest.TestCase): """ Check WebAccess behaviour WRT enabling/disabling web modules such as baskets. """ def test_precached_area_authorization(self): """webaccess - login-time precached authorizations for usebaskets""" error_messages = test_web_page_content(CFG_SITE_SECURE_URL + '/youraccount/display?ln=en', username='jekyll', password='j123ekyll', expected_text='Your Baskets') error_messages.extend(test_web_page_content(CFG_SITE_SECURE_URL + '/youraccount/display?ln=en', username='hyde', password='h123yde', unexpected_text='Your Baskets')) if error_messages: self.fail(merge_error_messages(error_messages)) if CFG_DEVEL_SITE: class WebAccessRobotLoginTest(unittest.TestCase): """ Check whether robot login functionality is OK. """ def _erase_example_user_and_groups(self): - from invenio.dbquery import run_sql - uid = run_sql("SELECT id FROM user WHERE email=%s", (self.a_email, )) - if uid: - run_sql("DELETE FROM user WHERE id=%s", (uid[0][0], )) - run_sql("DELETE FROM user_usergroup WHERE id_user=%s", (uid[0][0], )) - for method_name in self.robot_login_methods: - for group in self.some_groups: - run_sql("DELETE FROM usergroup WHERE name=%s", (group + " [" + method_name + "]",)) + for email in (self.a_email, self.another_email): + uid = run_sql("SELECT id FROM user WHERE email=%s", (email, )) + if uid: + run_sql("DELETE FROM user WHERE id=%s", (uid[0][0], )) + run_sql("DELETE FROM user_usergroup WHERE id_user=%s", (uid[0][0], )) + run_sql("DELETE FROM userEXT WHERE id_user=%s", (uid[0][0], )) + for method_name in self.robot_login_methods: + for group in self.some_groups: + run_sql("DELETE FROM usergroup WHERE name=%s", ("%s [%s]" % (group, method_name), )) + for nickname in (self.a_nickname, self.another_nickname): + run_sql("DELETE FROM userEXT WHERE id=%s", (nickname, )) def setUp(self): from invenio.access_control_config import CFG_EXTERNAL_AUTHENTICATION self.robot_login_methods = dict([(method_name, CFG_EXTERNAL_AUTHENTICATION[method_name]) for method_name in CFG_EXTERNAL_AUTHENTICATION if CFG_EXTERNAL_AUTHENTICATION[method_name] and CFG_EXTERNAL_AUTHENTICATION[method_name].robot_login_method_p()]) self.a_robot = "regression-test" self.a_password = "123" self.a_email = "foo.bar@example.org" + self.another_email = "baz@example.org" self.a_nickname = "foo-bar" + self.another_nickname = "baz" self.some_groups = ["a group for regression test", "another group for regression test"] self.myip = urlopen(CFG_SITE_URL + "/httptest/whatismyip").read() from invenio.external_authentication_robot import update_robot_key for method_name in self.robot_login_methods: update_robot_key(method_name, self.a_robot, self.a_password) from invenio.external_authentication_robot import load_robot_keys def tearDown(self): from invenio.external_authentication_robot import update_robot_key #for method_name in self.robot_login_methods: #update_robot_key(method_name, self.a_robot) from invenio.external_authentication_robot import load_robot_keys self._erase_example_user_and_groups() def test_normal_robot_login_method(self): """webaccess - robot login method""" for method_name, method in self.robot_login_methods.iteritems(): url = method.test_create_example_url(self.a_email, method_name, self.a_robot, self.myip) try: error_messages = test_web_page_content(url, expected_text=self.a_email) if error_messages: self.fail(merge_error_messages(error_messages)) finally: self._erase_example_user_and_groups() def test_robot_login_method_with_nickname(self): """webaccess - robot login method with nickname""" for method_name, method in self.robot_login_methods.iteritems(): if method.enforce_external_nicknames: url = method.test_create_example_url(self.a_email, method_name, self.a_robot, self.myip, nickname=self.a_nickname) try: error_messages = test_web_page_content(url, expected_text=self.a_nickname) if error_messages: self.fail(merge_error_messages(error_messages)) finally: self._erase_example_user_and_groups() def test_robot_login_method_with_groups(self): """webaccess - robot login method with groups""" for method_name, method in self.robot_login_methods.iteritems(): url = method.test_create_example_url(self.a_email, method_name, self.a_robot, self.myip, groups=self.some_groups, referer=CFG_SITE_SECURE_URL + "/yourgroups/display") try: for group in self.some_groups: error_messages = test_web_page_content(url, expected_text="%s [%s]" % (group, method_name)) if error_messages: self.fail(merge_error_messages(error_messages)) finally: self._erase_example_user_and_groups() def test_robot_login_method_wrong_ip(self): """webaccess - robot login method wrong IP""" for method_name, method in self.robot_login_methods.iteritems(): url = method.test_create_example_url(self.a_email, method_name, self.a_robot, '123.123.123.123') try: error_messages = test_web_page_content(url, expected_text="The provided assertion has been issued for a different IP address") if error_messages: self.fail(merge_error_messages(error_messages)) finally: self._erase_example_user_and_groups() def test_robot_login_method_expired_assertion(self): """webaccess - robot login method with expired assertion""" for method_name, method in self.robot_login_methods.iteritems(): url = method.test_create_example_url(self.a_email, method_name, self.a_robot, self.myip, timeout=time.time()) time.sleep(1) try: error_messages = test_web_page_content(url, expected_text="The provided assertion is expired") if error_messages: self.fail(merge_error_messages(error_messages)) finally: self._erase_example_user_and_groups() def test_robot_login_method_with_invalid_signature(self): """webaccess - robot login method with invalid signature""" for method_name, method in self.robot_login_methods.iteritems(): url = method.test_create_example_url(self.a_email, method_name, self.a_robot, self.myip) url = list(urlparse(url)) query = cgi.parse_qs(url[4]) for key, value in query.items(): query[key] = value[0] digest = query['digest'] digest0 = digest[0] if digest0 == '0': digest0 = '1' else: digest0 = '0' digest = digest0 + digest[1:] query['digest'] = digest url[4] = urlencode(query) url = urlunparse(url) try: error_messages = test_web_page_content(url, expected_text="does not validate against the digest") if error_messages: self.fail(merge_error_messages(error_messages)) finally: self._erase_example_user_and_groups() + def test_robot_login_method_changed_email(self): + """webaccess - robot login method changed email""" + for method_name, method in self.robot_login_methods.iteritems(): + url = method.test_create_example_url(self.a_email, method_name, self.a_robot, self.myip, nickname=self.a_nickname) + url2 = method.test_create_example_url(self.another_email, method_name, self.a_robot, self.myip, nickname=self.a_nickname) + try: + error_messages = test_web_page_content(url, expected_text=self.a_nickname) + if error_messages: + self.fail(merge_error_messages(error_messages)) + id_user = run_sql("SELECT id FROM user WHERE email=%s", (self.a_email, ))[0][0] + self.failUnless(run_sql("SELECT * FROM userEXT WHERE id=%s AND id_user=%s AND method=%s", (self.a_nickname, id_user, method_name)), "Can't find id %s for user %s with metod %s. userEXT contains: %s" % (self.a_nickname, id_user, method_name, run_sql("SELECT * FROM userEXT"))) + error_messages = test_web_page_content(url2, expected_text=self.a_nickname) + if error_messages: + self.fail(merge_error_messages(error_messages)) + id_user2 = run_sql("SELECT id FROM user WHERE email=%s", (self.another_email, ))[0][0] + self.assertEqual(id_user, id_user2) + self.failUnless(run_sql("SELECT * FROM userEXT WHERE id=%s AND id_user=%s AND method=%s", (self.a_nickname, id_user2, method_name))) + ## The old email should not exist any longer. + self.failIf(run_sql("SELECT * FROM user WHERE email=%s", (self.a_email, ))) + finally: + self._erase_example_user_and_groups() + + def test_robot_login_method_merging_accounts(self): + """webaccess - robot login method merging accounts""" + for method_name, method in self.robot_login_methods.iteritems(): + url = method.test_create_example_url(self.a_email, method_name, self.a_robot, self.myip, nickname=self.a_nickname) + url2 = method.test_create_example_url(self.another_email, method_name, self.a_robot, self.myip, nickname=self.another_nickname) + url3 = method.test_create_example_url(self.a_email, method_name, self.a_robot, self.myip, nickname=self.another_nickname) + try: + error_messages = test_web_page_content(url, expected_text=self.a_nickname) + if error_messages: + self.fail(merge_error_messages(error_messages)) + id_user = run_sql("SELECT id FROM user WHERE email=%s", (self.a_email, ))[0][0] + self.failUnless(run_sql("SELECT * FROM userEXT WHERE id=%s AND id_user=%s AND method=%s", (self.a_nickname, id_user, method_name))) + error_messages = test_web_page_content(url2, expected_text=self.another_nickname) + if error_messages: + self.fail(merge_error_messages(error_messages)) + id_user2 = run_sql("SELECT id FROM user WHERE email=%s", (self.another_email, ))[0][0] + self.failIfEqual(id_user, id_user2) + self.failUnless(run_sql("SELECT * FROM userEXT WHERE id=%s AND id_user=%s AND method=%s", (self.another_nickname, id_user2, method_name)), "Can't find id %s for user %s with metod %s. userEXT contains: %s" % (self.another_nickname, id_user2, method_name, run_sql("SELECT * FROM userEXT"))) + ## The first email should still exists + self.failUnless(run_sql("SELECT * FROM user WHERE email=%s", (self.a_email, ))) + ## We log in with the 1st email but with the second nickname. + ## That means the 1st user should be merged into the second. + ## However we still check for the 1st nickname, as in Invenio, + ## once a nickname is assigned it will never change! + error_messages = test_web_page_content(url3, expected_text=self.a_nickname) + if error_messages: + self.fail(merge_error_messages(error_messages)) + ## The another_email should not exist any longer. + self.failIf(run_sql("SELECT * FROM user WHERE email=%s", (self.another_email, )), "%s still exists! while it should have been merged into %s: %s, userEXT contains: %s" % (self.another_email, self.a_email, run_sql("SELECT * FROM user WHERE email=%s", (self.another_email, )), run_sql("SELECT * FROM userEXT"))) + ## And the corresponding user should not exist anymore as it has been + ## merged into id_user + self.failIf(run_sql("SELECT * FROM user WHERE id=%s", (id_user2, ))) + self.failUnless(run_sql("SELECT * FROM user WHERE id=%s AND email=%s", (id_user, self.a_email))) + finally: + self._erase_example_user_and_groups() + TEST_SUITE = make_test_suite(WebAccessWebPagesAvailabilityTest, WebAccessFireRoleTest, WebAccessUseBasketsTest, WebAccessRobotLoginTest) else: TEST_SUITE = make_test_suite(WebAccessWebPagesAvailabilityTest, WebAccessFireRoleTest, WebAccessUseBasketsTest) if __name__ == "__main__": run_test_suite(TEST_SUITE, warn_user=True) diff --git a/modules/webaccess/lib/webaccessadmin_lib.py b/modules/webaccess/lib/webaccessadmin_lib.py index 239434ce1..033f370f6 100644 --- a/modules/webaccess/lib/webaccessadmin_lib.py +++ b/modules/webaccess/lib/webaccessadmin_lib.py @@ -1,3838 +1,3884 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Invenio WebAccess Administrator Interface.""" __revision__ = "$Id$" __lastupdated__ = """$Date$""" ## fill config variables: import re import random import getopt import sys import time from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \ CFG_ACCESS_CONTROL_LEVEL_GUESTS, \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN, \ CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS, \ CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION, \ CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION, \ CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_ADMIN_EMAIL, \ CFG_SITE_SECURE_URL import invenio.access_control_engine as acce import invenio.access_control_admin as acca from invenio.mailutils import send_email from invenio.errorlib import register_exception from invenio.bibrankadminlib import addadminbox, tupletotable, \ tupletotable_onlyselected, addcheckboxes, createhiddenform from invenio.access_control_firerole import compile_role_definition, \ repair_role_definitions, serialize from invenio.messages import gettext_set_language from invenio.dbquery import run_sql, OperationalError, wash_table_column_name from invenio.webpage import page from invenio.webuser import getUid, isGuestUser, page_not_authorized, collect_user_info from invenio.webuser import email_valid_p, get_user_preferences, \ set_user_preferences, update_Uid from invenio.urlutils import redirect_to_url, wash_url_argument from invenio.access_control_config import DEF_DEMO_USER_ROLES, \ DEF_DEMO_ROLES, DEF_DEMO_AUTHS, WEBACCESSACTION, MAXPAGEUSERS, \ SUPERADMINROLE, CFG_EXTERNAL_AUTHENTICATION, DELEGATEADDUSERROLE, \ CFG_ACC_EMPTY_ROLE_DEFINITION_SRC, InvenioWebAccessFireroleError, \ - MAXSELECTUSERS, CFG_EXTERNAL_AUTH_DEFAULT + MAXSELECTUSERS, CFG_EXTERNAL_AUTH_DEFAULT, CFG_WEB_API_KEY_STATUS from invenio.bibtask import authenticate from cgi import escape def index(req, title='', body='', subtitle='', adminarea=2, authorized=0, ln=CFG_SITE_LANG): """main function to show pages for webaccessadmin. 1. if user not logged in and administrator, show the mustlogin page 2. if used without body argument, show the startpage 3. show admin page with title, body, subtitle and navtrail. authorized - if 1, don't check if the user is allowed to be webadmin """ navtrail_previous_links = 'Admin Area' \ '' % (CFG_SITE_SECURE_URL,) if body: if adminarea == 1: navtrail_previous_links += '> ' \ 'Delegate Rights ' % (CFG_SITE_SECURE_URL, ) if adminarea >= 2 and adminarea < 7: navtrail_previous_links += '> ' \ '' \ 'WebAccess Admin ' % (CFG_SITE_SECURE_URL, ) if adminarea == 3: navtrail_previous_links += '> ' \ 'Role Administration ' % (CFG_SITE_SECURE_URL, ) elif adminarea == 4: navtrail_previous_links += '> ' \ 'Action Administration ' % (CFG_SITE_SECURE_URL, ) elif adminarea == 5: navtrail_previous_links += '> ' \ 'User Administration ' % (CFG_SITE_SECURE_URL, ) elif adminarea == 6: navtrail_previous_links += '> ' \ 'Reset Authorizations ' % (CFG_SITE_SECURE_URL, ) elif adminarea == 7: navtrail_previous_links += '> ' \ 'Manage Accounts ' % (CFG_SITE_SECURE_URL, ) elif adminarea == 8: navtrail_previous_links += '> ' \ 'List Groups ' % (CFG_SITE_SECURE_URL, ) elif adminarea == 9: navtrail_previous_links += '> ' \ 'Manage Robot Login ' % (CFG_SITE_SECURE_URL, ) id_user = getUid(req) (auth_code, auth_message) = is_adminuser(req) if not authorized and auth_code != 0: return mustloginpage(req, auth_message) elif not body: title = 'WebAccess Admin' body = startpage() elif type(body) != str: body = addadminbox(subtitle, datalist=body) return page(title=title, uid=id_user, req=req, body=body, navtrail=navtrail_previous_links, lastupdated=__lastupdated__) def mustloginpage(req, message): """show a page asking the user to login.""" navtrail_previous_links = '' \ 'Admin Area > ' \ 'WebAccess Admin ' % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL) return page_not_authorized(req=req, text=message, navtrail=navtrail_previous_links) def is_adminuser(req): """check if user is a registered administrator. """ return acce.acc_authorize_action(req, WEBACCESSACTION) def perform_managerobotlogin(req, robot_name='', new_pwd1='', new_pwd2='', login_method='', timeout='', referer='', ip='', action='', confirm=0, email='', groups='', nickname='', json_assertion='', url_only=0): robot_name = wash_url_argument(robot_name, 'str') new_pwd1 = wash_url_argument(new_pwd1, 'str') new_pwd2 = wash_url_argument(new_pwd2, 'str') login_method = wash_url_argument(login_method, 'str') timeout = wash_url_argument(timeout, 'int') referer = wash_url_argument(referer, 'str') ip = wash_url_argument(ip, 'str') action = wash_url_argument(action, 'str') confirm = wash_url_argument(confirm, 'int') email = wash_url_argument(email, 'str') groups = wash_url_argument(groups, 'str') nickname = wash_url_argument(nickname, 'str') url_only = wash_url_argument(url_only, 'int') json_assertion = wash_url_argument(json_assertion, 'str') from invenio.external_authentication_robot import update_robot_key, load_robot_keys, json (auth_code, auth_message) = acce.acc_authorize_action(req, 'cfgrobotkeys', login_method='*', robot='*') if auth_code != 0: return mustloginpage(req, auth_message) available_robot_login_methods = [name for (name, method) in CFG_EXTERNAL_AUTHENTICATION.iteritems() if method and method.robot_login_method_p()] errors = [] warnings = [] messages = [] if not available_robot_login_methods: errors.append(""" You should enable at least on robot based login method in access_control_config.py in the variable CFG_EXTERNAL_AUTHENTICATION. """) forms = "" else: robot_keys = load_robot_keys() if not login_method: login_method = available_robot_login_methods[0] if not timeout: timeout = 60 * 60 if not ip: ip = req.remote_ip user_info = collect_user_info(req) if not email: email = user_info['email'] if not nickname: nickname = user_info['nickname'] if not robot_name: if login_method in robot_keys and robot_keys[login_method]: robot_name = robot_keys[login_method].keys()[0] if not referer: referer = CFG_SITE_SECURE_URL if action == 'changepwd': if acce.acc_authorize_action(user_info, 'cfgrobotkeys', login_method=login_method, robot=robot_name)[0]: errors.append("""You don't have proper authorization to modify robot %s for login_method %s.""" % (escape(robot_name), escape(login_method))) if login_method not in available_robot_login_methods: errors.append("""The login method must be one among the available_robot_login_methods (%s).""" % escape(', '.join(available_robot_login_methods))) if new_pwd1 != new_pwd2: errors.append("""The two passwords are not equal.""") new_pwd1 = '' new_pwd2 = '' if not robot_name: errors.append("""The robot name must be specified.""") if int(confirm) == 1: if not errors: update_robot_key(login_method, robot_name, new_pwd1) robot_keys = load_robot_keys() if new_pwd1: messages.append("""The password for robot %s has been successfully updated.""" % escape(robot_name)) else: messages.append("""The password for robot %s has been erased, and hence the robot %s does not exist anymore.""" % (escape(robot_name), escape(robot_name))) action = '' confirm = 0 robot_name = '' new_pwd1 = '' new_pwd2 = '' else: if not new_pwd1: warnings.append("""By setting an empty password you will actually erase the robot %s""" % escape(robot_name)) elif action == 'createurl': if acce.acc_authorize_action(user_info, 'cfgrobotkeys', login_method=login_method, robot=robot_name)[0]: errors.append("""You don't have proper authorization to create a URL for robot %s for login_method %s.""" % (escape(robot_name), escape(login_method))) if login_method not in available_robot_login_methods: errors.append("""The login method must be one among the available_robot_login_methods (%s).""" % escape(', '.join(available_robot_login_methods))) if robot_name not in robot_keys.get(login_method, {}): errors.append("""The robot name does not correspond to a valid robot name (for %s these are: %s).""" % (escape(login_method), escape(', '.join(robot_keys.get(login_method, {}).keys())))) if json_assertion.strip(): try: assertion = json.loads(json_assertion) assert(isinstance(assertion, dict)) except Exception, err: errors.append("""The assertion is not a valid json serializable mapping: %s""" % (err)) else: assertion = None if not email: errors.append("""The email is mandatory.""") if not ip: errors.append("""The IP address is mandatory.""") if not errors: url = CFG_EXTERNAL_AUTHENTICATION[login_method].test_create_example_url(email, login_method=login_method, robot=robot_name, ip=ip, timeout=time.time() + timeout, referer=referer, groups=groups.splitlines(), nickname=nickname, assertion=assertion) if url_only: req.content_type = 'text/plain' return url messages.append("""The corresponding URL is: %(url)s""" % { 'url_escape': escape(url, True), 'url': escape(url) }) action = '' forms = """

Existing login_method:

    %s

""" % ''.join(["
  • %s (robots: %s)
  • " % (CFG_SITE_SECURE_URL, method, method, ', '.join(robot_keys.get(method, {}))) for method in available_robot_login_methods]) forms += """

    Existing robot names (for login_method %s):

      %s

    """ % (escape(login_method), ''.join(["
  • %s
  • " % name for name in robot_keys.get(login_method, {})])) confirm_field = """""" if action == 'changepwd': confirm_field = """ Please confirm once more you want to change this password.""" login_method_boxes = "" for login_method_name in available_robot_login_methods: if login_method_name == login_method: login_method_boxes += """
    """ % {'name': escape(login_method_name, True)} else: login_method_boxes += """
    """ % {'name': escape(login_method_name, True)} forms += """ """ % { 'login_method': escape(login_method, True), 'robot_name': escape(robot_name, True), 'new_pwd1': escape(new_pwd1, True), 'new_pwd2': escape(new_pwd2, True), 'confirm_field': confirm_field, 'login_method_boxes': login_method_boxes, } forms += """
    Login method:%(login_method_boxes)s
    %(confirm_field)s
    Login method: %(login_method_boxes)s
    """ % { 'login_method_boxes': login_method_boxes, 'robot_name': escape(robot_name, True), 'timeout': escape(str(timeout), True), 'referer': escape(referer, True), 'ip': escape(ip, True), 'email': escape(email, True), 'nickname': escape(nickname, True), 'groups': escape(groups), 'json_assertion': escape(json_assertion) } out = "" if errors: out += "
    ERRORS
      " for error in errors: out += '
    • %s
    • ' % error out += "
    " if warnings: out += "
    WARNINGS
      " for warning in warnings: out += '
    • %s
    • ' % warning out += "
    " if messages: out += "
    INFORMATION
      " for message in messages: out += '
    • %s
    • ' % message out += "
    " out += forms return index(req=req, title='Manage Robot Login', subtitle='Are to manage robot-based authentiation', body=out, adminarea=2) def perform_listgroups(req): """List all the existing groups.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) header = ['name'] groups = run_sql('select name from usergroup') output = tupletotable(header, groups, highlight_rows_p=True, alternate_row_colors_p=True) extra = """
    Create new role
    go here to add a new role.
    """ return index(req=req, title='Group list', subtitle='All the groups registered in the system', body=[output, extra], adminarea=2) def perform_rolearea(req, grep=""): """create the role area menu page.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) header = ['id', 'name', 'description', 'firewall like role definition', 'users', 'authorizations / actions', 'role', ''] roles = acca.acc_get_all_roles() roles2 = [] if grep: try: re_grep = re.compile(grep) except Exception, err: re_grep = None grep = '' else: re_grep = None for (id, name, desc, dummy, firerole_def_src) in roles: if not firerole_def_src: firerole_def_src = '' ## Workaround for None. if re_grep and not re_grep.search(name) and not re_grep.search(desc) and not re_grep.search(firerole_def_src): ## We're grepping for some word. ## Let's dig into the authorization then. all_actions = acca.acc_find_possible_actions_all(id) ## FIXME: the acc_find_possible_actions_all is really an ugly ## function, but is the closest to what it's needed in order ## to retrieve all the authorization of a role. for idx, row in enumerate(all_actions): grepped = False if idx % 2 == 0: ## even lines contains headers like in: ## ['role', 'action', '#', 'collection'] ## the only useful text to grep is from index 3 onwards for keyword in row[3:]: if re_grep.search(keyword): grepped = True break if grepped: break else: ## odd lines contains content like in: ## [1, 18L, 1, 'Theses'] ## the useful text to grep is indirectly index 1 ## which is indeed the id_action (needed to retrieve the ## action name) and from column 3 onwards. if re_grep.search(acca.acc_get_action_name(row[1])): break for value in row[3:]: if re_grep.search(value): grepped = True break if grepped: break else: ## We haven't grepped anything! ## Let's skip to the next role then... continue if len(desc) > 30: desc = desc[:30] + '...' if firerole_def_src and len(firerole_def_src) > 30: firerole_def_src = firerole_def_src[:30] + '...' roles2.append([id, name, desc, firerole_def_src]) for col in [(('add', 'adduserrole'), ('delete', 'deleteuserrole'),), (('add', 'addauthorization'), ('modify', 'modifyauthorizations'), ('remove', 'deleteroleaction')), (('modify', 'modifyrole'), ('delete', 'deleterole')), (('show details', 'showroledetails'), )]: roles2[-1].append('%s' % (col[0][1], id, col[0][0])) for (str, function) in col[1:]: roles2[-1][-1] += ' / %s' % \ (function, id, str) output = """
    Users:
    add or remove users from the access to a role and its priviliges.
    Authorizations/Actions:
    these terms means almost the same, but an authorization is a
    connection between a role and an action (possibly) containing arguments.
    Roles:
    see all the information attached to a role and decide if you want to
    delete it.
    Show only roles having any detail matching the regular expression:
    """ % escape(grep) output += tupletotable(header=header, tuple=roles2, highlight_rows_p=True, alternate_row_colors_p=True) extra = """
    Create new role
    go here to add a new role.
    """ return index(req=req, title='Role Administration', subtitle='administration with roles as access point', body=[output, extra], adminarea=2) def perform_actionarea(req, grep=''): """create the action area menu page.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) if grep: try: re_grep = re.compile(grep) except Exception, err: re_grep = None grep = '' else: re_grep = None header = ['name', 'authorizations/roles', ''] actions = acca.acc_get_all_actions() actions2 = [] roles2 = [] for (id, name, description) in actions: if re_grep and not re_grep.search(name) and not re_grep.search(description): grepped = False roles = acca.acc_get_action_roles(id) for id_role, role_name, role_description in roles: if re_grep.search(role_name) or re_grep.search(role_description): grepped = True break elif re_grep.search(acca.acc_get_role_details(id_role)[3] or ''): ## Found in FireRole grepped = True break else: details = acca.acc_find_possible_actions(id_role, id) if details: for argument in details[0][1:]: if re_grep.search(argument): grepped = True break for values in details[1:]: for value in values[1:]: if re_grep.search(value): grepped = True break if grepped: break if grepped: break if not grepped: continue actions2.append([name, description]) for col in [(('add', 'addauthorization'), ('modify', 'modifyauthorizations'), ('remove', 'deleteroleaction')), (('show details', 'showactiondetails'), )]: actions2[-1].append('%s' '' % (col[0][1], id, col[0][0])) for (str, function) in col[1:]: actions2[-1][-1] += ' / %s' % (function, id, str) output = """
    Authorizations/Roles:
    these terms means almost the same, but an authorization is a
    connection between a role and an action (possibly) containing arguments.
    Actions:
    see all the information attached to an action.
    Show only actions having any detail matching the regular expression:
    """ % escape(grep) output += tupletotable(header=header, tuple=actions2, highlight_rows_p=True, alternate_row_colors_p=True) extra = """
    Create new role
    go here to add a new role.
    """ return index(req=req, title='Action Administration', subtitle='administration with actions as access point', body=[output, extra], adminarea=2) def perform_userarea(req, email_user_pattern=''): """create area to show info about users. """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = 'step 1 - search for users' output = """

    search for users to display.

    """ # remove letters not allowed in an email email_user_pattern = cleanstring_email(email_user_pattern) text = ' 1. search for user\n' text += ' \n' % (email_user_pattern, ) output += createhiddenform(action="userarea", text=text, button="search for users") if email_user_pattern: try: users1 = run_sql("""SELECT id, email FROM user WHERE email<>'' AND email RLIKE %s ORDER BY email LIMIT %s""", (email_user_pattern, MAXPAGEUSERS+1)) except OperationalError: users1 = () if not users1: output += '

    no matching users

    ' else: subtitle = 'step 2 - select what to do with user' users = [] for (id, email) in users1[:MAXPAGEUSERS]: users.append([id, email]) for col in [(('add', 'addroleuser'), ('remove', 'deleteuserrole')), (('show details', 'showuserdetails'), )]: users[-1].append('%s' % (col[0][1], email_user_pattern, id, col[0][0])) for (str, function) in col[1:]: users[-1][-1] += ' / %s' % \ (function, email_user_pattern, id, str) output += '

    found %s matching users:

    ' % \ (len(users1), ) output += tupletotable(header=['id', 'email', 'roles', ''], tuple=users, highlight_rows_p=True, alternate_row_colors_p=True) if len(users1) > MAXPAGEUSERS: output += '

    only showing the first %s users, ' \ 'narrow your search...

    ' % (MAXPAGEUSERS, ) return index(req=req, title='User Administration', subtitle=subtitle, body=[output], adminarea=2) def perform_resetarea(req): """create the reset area menu page.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) output = """
    Reset to Default Authorizations
    remove all changes that has been done to the roles and
    add only the default authorization settings.
    Add Default Authorizations
    keep all changes and add the default authorization settings.
    """ return index(req=req, title='Reset Authorizations', subtitle='reseting to or adding default authorizations', body=[output], adminarea=2) def perform_resetdefaultsettings(req, superusers=[], confirm=0): """delete all roles, actions and authorizations presently in the database and add only the default roles. only selected users will be added to superadmin, rest is blank """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) # cleaning input if type(superusers) == str: superusers = [superusers] # remove not valid e-mails for email in superusers: if not check_email(email): superusers.remove(email) # instructions output = """

    before you reset the settings, we need some users
    to connect to %s.
    enter as many e-mail addresses you want and press reset.
    confirm reset settings when you have added enough e-mails.
    %s is added as default.

    """ % (SUPERADMINROLE, CFG_SITE_ADMIN_EMAIL) # add more superusers output += """

    enter user e-mail addresses:

    """ for email in superusers: output += ' ' % (email, ) output += """ e-mail
    """ if superusers: # remove emails output += """
    have you entered wrong data?
    """ # superusers confirm table start = '
    ' extra = ' ' for email in superusers: extra += '' % (email, ) extra += ' ' end = '
    ' output += '

    reset default settings with the users below?

    ' output += tupletotable(header=['e-mail address'], tuple=superusers, start=start, extracolumn=extra, end=end, highlight_rows_p=True, alternate_row_colors_p=True) if confirm in [1, "1"]: res = acca.acc_reset_default_settings(superusers) if res: output += '

    successfully reset default settings

    ' else: output += '

    sorry, could not reset default settings

    ' return index(req=req, title='Reset Default Settings', subtitle='reset settings', body=[output], adminarea=6) def perform_adddefaultsettings(req, superusers=[], confirm=0): """add the default settings, and keep everything else. probably nothing will be deleted, except if there has been made changes to the defaults.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) # cleaning input if type(superusers) == str: superusers = [superusers] # remove not valid e-mails for email in superusers: if not check_email(email): superusers.remove(email) # instructions output = """

    before you add the settings, we need some users
    to connect to %s.
    enter as many e-mail addresses you want and press add.
    confirm add settings when you have added enough e-mails.
    %s is added as default.

    """ % (SUPERADMINROLE, CFG_SITE_ADMIN_EMAIL) # add more superusers output += """

    enter user e-mail addresses:

    """ for email in superusers: output += ' ' % (email, ) output += """ e-mail
    """ if superusers: # remove emails output += """
    have you entered wrong data?
    """ # superusers confirm table start = '
    ' extra = ' ' for email in superusers: extra += '' % (email, ) extra += ' ' end = '
    ' output += '

    add default settings with the users below?

    ' output += tupletotable(header=['e-mail address'], tuple=superusers, start=start, extracolumn=extra, end=end) if confirm in [1, "1"]: res = acca.acc_add_default_settings(superusers) if res: output += '

    successfully added default settings

    ' else: output += '

    sorry, could not add default settings

    ' return index(req=req, title='Add Default Settings', subtitle='add settings', body=[output], adminarea=6) def perform_manageaccounts(req, mtype='', content='', confirm=0): """start area for managing accounts.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = 'Overview' fin_output = '' fin_output += """
    Menu
    0. Show all 1. Access policy 2. Account overview 3. Create account 4. Edit accounts
    """ % (CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL, CFG_SITE_SECURE_URL) if mtype == "perform_accesspolicy" and content: fin_output += content elif mtype == "perform_accesspolicy" or mtype == "perform_showall": fin_output += perform_accesspolicy(req, callback='') fin_output += "
    " if mtype == "perform_accountoverview" and content: fin_output += content elif mtype == "perform_accountoverview" or mtype == "perform_showall": fin_output += perform_accountoverview(req, callback='') fin_output += "
    " if mtype == "perform_createaccount" and content: fin_output += content elif mtype == "perform_createaccount" or mtype == "perform_showall": fin_output += perform_createaccount(req, callback='') fin_output += "
    " if mtype == "perform_modifyaccounts" and content: fin_output += content elif mtype == "perform_modifyaccounts" or mtype == "perform_showall": fin_output += perform_modifyaccounts(req, callback='') fin_output += "
    " if mtype == "perform_becomeuser" and content: fin_output += content elif mtype == "perform_becomeuser" or mtype == "perform_showall": fin_output += perform_becomeuser(req, callback='') fin_output += "
    " return index(req=req, title='Manage Accounts', subtitle=subtitle, body=[fin_output], adminarea=0, authorized=1) def perform_accesspolicy(req, callback='yes', confirm=0): """Modify default behaviour of a guest user or if new accounts should automatically/manually be modified.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = """1. Access policy.   [?]""" % CFG_SITE_SECURE_URL account_policy = {} account_policy[0] = "Users can register new accounts. New accounts automatically activated." account_policy[1] = "Users can register new accounts. Admin users must activate the accounts." account_policy[2] = "Only admin can register new accounts. User cannot edit email address." account_policy[3] = "Only admin can register new accounts. User cannot edit email address or password." account_policy[4] = "Only admin can register new accounts. User cannot edit email address, password or login method." account_policy[5] = "Only admin can register new accounts. User cannot edit email address, password or login method and information about how to get an account is hidden from the login page." site_policy = {} site_policy[0] = "Normal operation of the site." site_policy[1] = "Read-only site, all write operations temporarily closed." site_policy[2] = "Site fully closed." site_policy[3] = "Site fully closed. Database connection disabled." output = "(Modifications must be done in access_control_config.py)
    " output += "
    Current settings:
    " output += "Site status: %s
    " % (site_policy[CFG_ACCESS_CONTROL_LEVEL_SITE]) output += "Guest accounts allowed: %s
    " % (CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0 and "Yes" or "No") output += "Account policy: %s
    " % (account_policy[CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS]) output += "Allowed email addresses limited: %s
    " % (CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN and CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN or "Not limited") output += "Send email to admin when new account: %s
    " % (CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS == 1 and "Yes" or "No") output += "Send email to user after creating new account: %s
    " % (CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT == 1 and "Yes" or "No") output += "Send email to user when account is activated: %s
    " % (CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION == 1 and "Yes" or "No") output += "Send email to user when account is deleted/rejected: %s
    " % (CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION == 1 and "Yes" or "No") output += "
    " output += "Available 'login via' methods:
    " methods = CFG_EXTERNAL_AUTHENTICATION.keys() methods.sort() for system in methods: output += """%s %s
    """ % (system, (CFG_EXTERNAL_AUTH_DEFAULT == system and "(Default)" or "")) output += "
    Changing the settings:
    " output += "Currently, all changes must be done using your favourite editor, and the webserver restarted for changes to take effect. For the settings to change, either look in the guide or in access_control_config.py ." body = [output] if callback: return perform_manageaccounts(req, "perform_accesspolicy", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_accountoverview(req, callback='yes', confirm=0): """Modify default behaviour of a guest user or if new accounts should automatically/manually be modified.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = """2. Account overview.   [?]""" % CFG_SITE_SECURE_URL output = "" res = run_sql("SELECT COUNT(*) FROM user WHERE email=''") output += "Guest accounts: %s
    " % res[0][0] res = run_sql("SELECT COUNT(*) FROM user WHERE email!=''") output += "Registered accounts: %s
    " % res[0][0] res = run_sql("SELECT COUNT(*) FROM user WHERE email!='' AND note='0' OR note IS NULL") output += "Inactive accounts: %s " % res[0][0] if res[0][0] > 0: output += ' [Activate/Reject accounts]' res = run_sql("SELECT COUNT(*) FROM user") output += "
    Total nr of accounts: %s
    " % res[0][0] body = [output] if callback: return perform_manageaccounts(req, "perform_accountoverview", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_createaccount(req, email='', password='', callback='yes', confirm=0): """Modify default behaviour of a guest user or if new accounts should automatically/manually be modified.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = """3. Create account.   [?]""" % CFG_SITE_SECURE_URL output = "" text = ' Email:\n' text += '
    ' % (email, ) text += ' Password:\n' text += '
    ' % (password, ) output += createhiddenform(action="createaccount", text=text, confirm=1, button="Create") if confirm in [1, "1"] and email and email_valid_p(email): res = run_sql("SELECT email FROM user WHERE email=%s", (email,)) if not res: res = run_sql("INSERT INTO user (email,password, note) values(%s,AES_ENCRYPT(email,%s), '1')", (email, password)) if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT == 1: emailsent = send_new_user_account_warning(email, email, password) == 0 if password: output += 'Account created with password and activated.' else: output += 'Account created without password and activated.' if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT == 1: if emailsent: output += '
    An email has been sent to the owner of the account.' else: output += '
    Could not send an email to the owner of the account.' else: output += 'An account with the same email already exists.' elif confirm in [1, "1"]: output += 'Please specify an valid email-address.' body = [output] if callback: return perform_manageaccounts(req, "perform_createaccount", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_modifyaccountstatus(req, userID, email_user_pattern, limit_to, maxpage, page, callback='yes', confirm=0): """set a disabled account to enabled and opposite""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) res = run_sql("SELECT id, email, note FROM user WHERE id=%s", (userID, )) subtitle = "" output = "" if res: if res[0][2] in [0, "0", None]: res2 = run_sql("UPDATE user SET note=1 WHERE id=%s", (userID, )) output += """The account '%s' has been activated.""" % res[0][1] if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_ACTIVATION == 1: emailsent = send_account_activated_message(res[0][1], res[0][1], '*****') if emailsent: output += """
    An email has been sent to the owner of the account.""" else: output += """
    Could not send an email to the owner of the account.""" elif res[0][2] in [1, "1"]: res2 = run_sql("UPDATE user SET note=0 WHERE id=%s", (userID, )) output += """The account '%s' has been set inactive.""" % res[0][1] else: output += 'The account id given does not exist.' body = [output] if callback: return perform_modifyaccounts(req, email_user_pattern, limit_to, maxpage, page, content=output, callback='yes') else: return addadminbox(subtitle, body) def perform_editaccount(req, userID, mtype='', content='', callback='yes', confirm=-1): """form to modify an account. this method is calling other methods which again is calling this and sending back the output of the method. if callback, the method will call perform_editcollection, if not, it will just return its output. userID - id of the user mtype - the method that called this method. content - the output from that method.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) res = run_sql("SELECT id, email FROM user WHERE id=%s", (userID, )) if not res: if mtype == "perform_deleteaccount": text = """The selected account has been deleted, to continue editing, go back to 'Manage Accounts'.""" if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION == 1: text += """
    An email has been sent to the owner of the account.""" else: text = """The selected accounts does not exist, please go back and select an account to edit.""" return index(req=req, title='Edit Account', subtitle="Edit account", body=[text], adminarea=7, authorized=1) fin_output = """ +
    Menu
    0. Show all 1. Modify login-data 2. Modify preferences
    3. Delete account4. Edit REST API Key
    - """ % (CFG_SITE_SECURE_URL, userID, CFG_SITE_SECURE_URL, userID, CFG_SITE_SECURE_URL, userID, CFG_SITE_SECURE_URL, userID) + """ % (CFG_SITE_SECURE_URL, userID, CFG_SITE_SECURE_URL, userID, CFG_SITE_SECURE_URL, userID, CFG_SITE_SECURE_URL, userID, CFG_SITE_SECURE_URL, userID) if mtype == "perform_modifylogindata" and content: fin_output += content elif mtype == "perform_modifylogindata" or not mtype: fin_output += perform_modifylogindata(req, userID, callback='') if mtype == "perform_modifypreferences" and content: fin_output += content elif mtype == "perform_modifypreferences" or not mtype: fin_output += perform_modifypreferences(req, userID, callback='') if mtype == "perform_deleteaccount" and content: fin_output += content elif mtype == "perform_deleteaccount" or not mtype: fin_output += perform_deleteaccount(req, userID, callback='') + if mtype == "perform_modifyapikeydata" and content: + fin_output += content + elif mtype == "perform_modifyapikeydata" or not mtype: + fin_output += perform_modifyapikeydata(req, userID, callback='') + return index(req=req, title='Edit Account', subtitle="Edit account '%s'" % res[0][1], body=[fin_output], adminarea=7, authorized=1) def perform_becomeuser(req, userID='', callback='yes', confirm=0): """modify email and password of an account""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = """5. Became user.   [?]""" % CFG_SITE_SECURE_URL res = run_sql("SELECT email FROM user WHERE id=%s", (userID, )) output = "" if res: update_Uid(req, res[0][0]) redirect_to_url(req, CFG_SITE_SECURE_URL) else: output += 'The account id given does not exist.' body = [output] if callback: return perform_editaccount(req, userID, mtype='perform_becomeuser', content=addadminbox(subtitle, body), callback='yes') else: return addadminbox(subtitle, body) def perform_modifylogindata(req, userID, nickname='', email='', password='', callback='yes', confirm=0): """modify email and password of an account""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = """1. Edit login-data.   [?]""" % CFG_SITE_SECURE_URL res = run_sql("SELECT id, email, nickname FROM user WHERE id=%s", (userID, )) output = "" if res: if not email and not password: email = res[0][1] nickname = res[0][2] text = ' Account id:%s
    \n' % userID text = ' Nickname:\n' text += '
    ' % (nickname, ) text += ' Email:\n' text += '
    ' % (email, ) text += ' Password:\n' text += '
    ' % (password, ) output += createhiddenform(action="modifylogindata", text=text, userID=userID, confirm=1, button="Modify") if confirm in [1, "1"] and email and email_valid_p(email): res = run_sql("SELECT nickname FROM user WHERE nickname=%s AND id<>%s", (nickname, userID)) if res: output += 'Sorry, the specified nickname is already used.' else: res = run_sql("UPDATE user SET email=%s WHERE id=%s", (email, userID)) if password: res = run_sql("UPDATE user SET password=AES_ENCRYPT(email,%s) WHERE id=%s", (password, userID)) else: output += 'Password not modified. ' res = run_sql("UPDATE user SET nickname=%s WHERE id=%s", (nickname, userID)) output += 'Nickname/email and/or password modified.' elif confirm in [1, "1"]: output += 'Please specify an valid email-address.' else: output += 'The account id given does not exist.' body = [output] if callback: return perform_editaccount(req, userID, mtype='perform_modifylogindata', content=addadminbox(subtitle, body), callback='yes') else: return addadminbox(subtitle, body) def perform_modifypreferences(req, userID, login_method='', callback='yes', confirm=0): """modify email and password of an account""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = """2. Modify preferences.   [?]""" % CFG_SITE_SECURE_URL res = run_sql("SELECT id, email FROM user WHERE id=%s", (userID, )) output = "" if res: user_pref = get_user_preferences(userID) if confirm in [1, "1"]: if login_method: user_pref['login_method'] = login_method set_user_preferences(userID, user_pref) output += "Select default login method:
    " text = "" methods = CFG_EXTERNAL_AUTHENTICATION.keys() methods.sort() for system in methods: text += """%s
    """ % (system, (user_pref['login_method'] == system and "checked" or ""), system) output += createhiddenform(action="modifypreferences", text=text, confirm=1, userID=userID, button="Select") if confirm in [1, "1"]: if login_method: output += """The login method has been changed""" else: output += """Nothing to update""" else: output += 'The account id given does not exist.' body = [output] if callback: return perform_editaccount(req, userID, mtype='perform_modifypreferences', content=addadminbox(subtitle, body), callback='yes') else: return addadminbox(subtitle, body) def perform_deleteaccount(req, userID, callback='yes', confirm=0): """delete account""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = """3. Delete account.   [?]""" % CFG_SITE_SECURE_URL res = run_sql("SELECT id, email FROM user WHERE id=%s", (userID, )) output = "" if res: if confirm in [0, "0"]: text = 'Are you sure you want to delete the account with email: "%s"?' % res[0][1] output += createhiddenform(action="deleteaccount", text=text, userID=userID, confirm=1, button="Delete") elif confirm in [1, "1"]: res2 = run_sql("DELETE FROM user WHERE id=%s", (userID, )) output += 'Account deleted.' if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION == 1: emailsent = send_account_deleted_message(res[0][1], res[0][1]) else: output += 'The account id given does not exist.' body = [output] if callback: return perform_editaccount(req, userID, mtype='perform_deleteaccount', content=addadminbox(subtitle, body), callback='yes') else: return addadminbox(subtitle, body) +def perform_modifyapikeydata(req, userID, keyID='', status='' , callback='yes', confirm=0): + """modify REST API keys of an account""" + + (auth_code, auth_message) = is_adminuser(req) + if auth_code != 0: return mustloginpage(req, auth_message) + + subtitle = """4. Edit REST API Keys.   [?]""" % CFG_SITE_SECURE_URL + + if confirm in [1, "1"]: + run_sql("UPDATE apikey SET status=%s WHERE id=%s", (status, keyID)) + + res = run_sql("SELECT id, description, status FROM apikey WHERE id_user=%s", (userID, )) + output = "" + if res: + for key_info in res: + text = '' + text += ' Key: %s
    \n' % key_info[0] + text += ' ' % key_info[0] + text += ' Description: %s
    \n' % key_info[1] + text += '
    \n' + if key_info[0] == keyID: + text += 'Key status modified' + output += createhiddenform(action="modifyapikeydata", + text=text, + userID=userID, + confirm=1, + button="Modify") + else: + output += 'The account id given does not have REST API Keys.' + + body = [output] + + if callback: + return perform_editaccount(req, userID, mtype='perform_modifyapikeydata', content=addadminbox(subtitle, body), callback='yes') + else: + return addadminbox(subtitle, body) + def perform_rejectaccount(req, userID, email_user_pattern, limit_to, maxpage, page, callback='yes', confirm=0): """Delete account and send an email to the owner.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) res = run_sql("SELECT id, email, note FROM user WHERE id=%s", (userID, )) output = "" subtitle = "" if res: res2 = run_sql("DELETE FROM user WHERE id=%s", (userID, )) output += 'Account rejected and deleted.' if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_DELETION == 1: if not res[0][2] or res[0][2] == "0": emailsent = send_account_rejected_message(res[0][1], res[0][1]) elif res[0][2] == "1": emailsent = send_account_deleted_message(res[0][1], res[0][1]) if emailsent: output += """
    An email has been sent to the owner of the account.""" else: output += """
    Could not send an email to the owner of the account.""" else: output += 'The account id given does not exist.' body = [output] if callback: return perform_modifyaccounts(req, email_user_pattern, limit_to, maxpage, page, content=output, callback='yes') else: return addadminbox(subtitle, body) def perform_modifyaccounts(req, email_user_pattern='', limit_to=-1, maxpage=MAXPAGEUSERS, page=1, content='', callback='yes', confirm=0): """Modify default behaviour of a guest user or if new accounts should automatically/manually be modified.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) subtitle = """4. Edit accounts.   [?]""" % CFG_SITE_SECURE_URL output = "" # remove letters not allowed in an email email_user_pattern = cleanstring_email(email_user_pattern) try: maxpage = int(maxpage) except: maxpage = MAXPAGEUSERS try: page = int(page) if page < 1: page = 1 except: page = 1 text = ' Email (part of):\n' text += '
    ' % (email_user_pattern, ) text += """Limit to:
    """ % ((limit_to=="all" and "selected" or ""), (limit_to=="enabled" and "selected" or ""), (limit_to=="disabled" and "selected" or "")) text += """Accounts per page:
    """ % ((maxpage==25 and "selected" or ""), (maxpage==50 and "selected" or ""), (maxpage==100 and "selected" or ""), (maxpage==250 and "selected" or ""), (maxpage==500 and "selected" or ""), (maxpage==1000 and "selected" or "")) output += createhiddenform(action="modifyaccounts", text=text, button="search for accounts") if limit_to not in [-1, "-1"] and maxpage: options = [] users1 = "SELECT id,email,note FROM user WHERE " if limit_to == "enabled": users1 += " email!='' AND note=1" elif limit_to == "disabled": users1 += " email!='' AND note=0 OR note IS NULL" elif limit_to == "guest": users1 += " email=''" else: users1 += " email!=''" if email_user_pattern: users1 += " AND email RLIKE %s" options += [email_user_pattern] users1 += " ORDER BY email LIMIT %s" options += [maxpage * page + 1] try: users1 = run_sql(users1, tuple(options)) except OperationalError: users1 = () if not users1: output += 'There are no accounts matching the email given.' else: users = [] if maxpage * (page - 1) > len(users1): page = len(users1) / maxpage + 1 for (id, email, note) in users1[maxpage * (page - 1):(maxpage * page)]: users.append(['', id, email, (note=="1" and 'Active' or 'Inactive')]) for col in [(((note=="1" and 'Inactivate' or 'Activate'), 'modifyaccountstatus'), ((note == "0" and 'Reject' or 'Delete'), 'rejectaccount'), ), (('Edit account', 'editaccount'), ),]: users[-1].append('%s' % (col[0][1], id, email_user_pattern, limit_to, maxpage, page, random.randint(0, 1000), col[0][0])) for (str, function) in col[1:]: users[-1][-1] += ' / %s' % (function, id, email_user_pattern, limit_to, maxpage, page, random.randint(0, 1000), str) users[-1].append('%s' % ('becomeuser', id, email_user_pattern, limit_to, maxpage, page, random.randint(0, 1000), 'Become user')) last = "" next = "" if len(users1) > maxpage: if page > 1: last += 'Last Page' % (email_user_pattern, limit_to, maxpage, (page - 1)) if len(users1[maxpage * (page - 1):(maxpage * page)]) == maxpage: next += 'Next page' % (email_user_pattern, limit_to, maxpage, (page + 1)) output += 'Showing accounts %s-%s:' % (1 + maxpage * (page - 1), maxpage * page) else: output += '%s matching account(s):' % len(users1) output += tupletotable(header=[last, 'id', 'email', 'Status', '', '', next], tuple=users) else: output += 'Please select which accounts to find and how many to show per page.' if content: output += "
    %s" % content body = [output] if callback: return perform_manageaccounts(req, "perform_modifyaccounts", addadminbox(subtitle, body)) else: return addadminbox(subtitle, body) def perform_delegate_startarea(req): """start area for lower level delegation of rights.""" # refuse access to guest users: uid = getUid(req) if isGuestUser(uid): return index(req=req, title='Delegate Rights', adminarea=0, authorized=0) subtitle = 'select what to do' output = '' if is_adminuser(req)[0] == 0: output += """

    You are also allowed to be in the Main Admin Area which gives you
    the access to the full functionality of WebAccess.

    """ output += """
    Connect users to roles
    add users to the roles you have delegation rights to.
    Remove users from roles
    remove users from the roles you have delegation rights to.
    Set up delegation rights
    specialized area to set up the delegation rights used in the areas above.
    you need to be a web administrator to access the area.
    """ return index(req=req, title='Delegate Rights', subtitle=subtitle, body=[output], adminarea=0, authorized=1) def perform_delegate_adminsetup(req, id_role_admin=0, id_role_delegate=0, confirm=0): """lets the webadmins set up the delegation rights for the other roles id_role_admin - the role to be given delegation rights id_role_delegate - the role over which the delegation rights are given confirm - make the connection happen """ subtitle = 'step 1 - select admin role' admin_roles = acca.acc_get_all_roles() output = """

    This is a specialized area to handle a task that also can be handled
    from the "add authorization" interface.

    By handling the delegation rights here you get the advantage of
    not having to select the correct action (%s) or
    remembering the names of available roles.

    """ % (DELEGATEADDUSERROLE, ) output += createroleselect(id_role=id_role_admin, step=1, button='select admin role', name='id_role_admin', action='delegate_adminsetup', roles=admin_roles) if str(id_role_admin) != '0': subtitle = 'step 2 - select delegate role' name_role_admin = acca.acc_get_role_name(id_role=id_role_admin) delegate_roles_old = acca.acc_find_delegated_roles(id_role_admin=id_role_admin) delegate_roles = [] delegate_roles_old_names = [] for role in admin_roles: if (role,) not in delegate_roles_old: delegate_roles.append(role) else: delegate_roles_old_names.append(role[1]) if delegate_roles_old_names: delegate_roles_old_names.sort() names_str = '' for name in delegate_roles_old_names: if names_str: names_str += ', ' names_str += name output += '

    previously selected roles: %s.

    ' % (names_str, ) extra = """
    Remove delegated roles
    use the standard administration area to remove delegation rights you no longer want to be available.
    """ % (id_role_admin, acca.acc_get_action_id(name_action=DELEGATEADDUSERROLE)) else: output += '

    no previously selected roles.

    ' output += createroleselect(id_role=id_role_delegate, step=2, button='select delegate role', name='id_role_delegate', action='delegate_adminsetup', roles=delegate_roles, id_role_admin=id_role_admin) if str(id_role_delegate) != '0': subtitle = 'step 3 - confirm to add delegation right' name_role_delegate = acca.acc_get_role_name(id_role=id_role_delegate) output += """

    Warning: don't hand out delegation rights that can harm the system (e.g. delegating superrole).

    """ output += createhiddenform(action="delegate_adminsetup", text='let role %s delegate rights over role %s?' % (name_role_admin, name_role_delegate), id_role_admin=id_role_admin, id_role_delegate=id_role_delegate, confirm=1) if int(confirm): subtitle = 'step 4 - confirm delegation right added' # res1 = acca.acc_add_role_action_arguments_names(name_role=name_role_admin, # name_action=DELEGATEADDUSERROLE, # arglistid=-1, # optional=0, # role=name_role_delegate) res1 = acca.acc_add_authorization(name_role=name_role_admin, name_action=DELEGATEADDUSERROLE, optional=0, role=name_role_delegate) if res1: output += '

    confirm: role %s delegates role %s.' % (name_role_admin, name_role_delegate) else: output += '

    sorry, delegation right could not be added,
    it probably already exists.

    ' # see if right hand menu is available try: body = [output, extra] except NameError: body = [output] return index(req=req, title='Delegate Rights', subtitle=subtitle, body=body, adminarea=1) def perform_delegate_adduserrole(req, id_role=0, email_user_pattern='', id_user=0, confirm=0): """let a lower level web admin add users to a limited set of roles. id_role - the role to connect to a user id_user - the user to connect to a role confirm - make the connection happen """ # finding the allowed roles for this user id_admin = getUid(req) id_action = acca.acc_get_action_id(name_action=DELEGATEADDUSERROLE) actions = acca.acc_find_possible_actions_user(id_user=id_admin, id_action=id_action) allowed_roles = [] allowed_id_roles = [] for (id, arglistid, name_role_help) in actions[1:]: id_role_help = acca.acc_get_role_id(name_role=name_role_help) if id_role_help and [id_role_help, name_role_help, ''] not in allowed_roles: allowed_roles.append([id_role_help, name_role_help, '']) allowed_id_roles.append(str(id_role_help)) output = '' if not allowed_roles: subtitle = 'no delegation rights' output += """

    You do not have the delegation rights over any roles.
    If you think you should have such rights, contact a WebAccess Administrator.

    """ extra = '' else: subtitle = 'step 1 - select role' output += """

    Lower level delegation of access rights to roles.
    An administrator with all rights have to give you these rights.

    """ email_out = acca.acc_get_user_email(id_user=id_user) name_role = acca.acc_get_role_name(id_role=id_role) output += createroleselect(id_role=id_role, step=1, name='id_role', action='delegate_adduserrole', roles=allowed_roles) if str(id_role) != '0' and str(id_role) in allowed_id_roles: subtitle = 'step 2 - search for users' # remove letters not allowed in an email email_user_pattern = cleanstring_email(email_user_pattern) text = ' 2. search for user \n' text += ' \n' % (email_user_pattern, ) output += createhiddenform(action="delegate_adduserrole", text=text, button="search for users", id_role=id_role) # pattern is entered if email_user_pattern: # users with matching email-address try: users1 = run_sql("""SELECT id, email FROM user WHERE email<>'' AND email RLIKE %s ORDER BY email """, (email_user_pattern, )) except OperationalError: users1 = () # users that are connected try: users2 = run_sql("""SELECT DISTINCT u.id, u.email FROM user u LEFT JOIN user_accROLE ur ON u.id = ur.id_user WHERE ur.id_accROLE = %s AND u.email RLIKE %s ORDER BY u.email """, (id_role, email_user_pattern)) except OperationalError: users2 = () # no users that match the pattern if not (users1 or users2): output += '

    no qualified users, try new search.

    ' # too many matching users elif len(users1) > MAXSELECTUSERS: output += '

    %s hits, too many qualified users, specify more narrow search. (limit %s)

    ' % (len(users1), MAXSELECTUSERS) # show matching users else: subtitle = 'step 3 - select a user' users = [] extrausers = [] for (id, email) in users1: if (id, email) not in users2: users.append([id,email,'']) for (id, email) in users2: extrausers.append([-id, email,'']) output += createuserselect(id_user=id_user, action="delegate_adduserrole", step=3, users=users, extrausers=extrausers, button="add this user", id_role=id_role, email_user_pattern=email_user_pattern) try: id_user = int(id_user) except ValueError: pass # user selected already connected to role if id_user < 0: output += '

    users in brackets are already attached to the role, try another one...

    ' # a user is selected elif email_out: subtitle = "step 4 - confirm to add user" output += createhiddenform(action="delegate_adduserrole", text='add user %s to role %s?' % (email_out, name_role), id_role=id_role, email_user_pattern=email_user_pattern, id_user=id_user, confirm=1) # it is confirmed that this user should be added if confirm: # add user result = acca.acc_add_user_role(id_user=id_user, id_role=id_role) if result and result[2]: subtitle = 'step 5 - confirm user added' output += '

    confirm: user %s added to role %s.

    ' % (email_out, name_role) else: subtitle = 'step 5 - user could not be added' output += '

    sorry, but user could not be added.

    ' extra = """
    Remove users from role
    remove users from the roles you have delegating rights to.
    """ % (id_role, ) return index(req=req, title='Connect users to roles', subtitle=subtitle, body=[output, extra], adminarea=1, authorized=1) def perform_delegate_deleteuserrole(req, id_role=0, id_user=0, confirm=0): """let a lower level web admin remove users from a limited set of roles. id_role - the role to connect to a user id_user - the user to connect to a role confirm - make the connection happen """ subtitle = 'in progress...' output = '

    in progress...

    ' # finding the allowed roles for this user id_admin = getUid(req) id_action = acca.acc_get_action_id(name_action=DELEGATEADDUSERROLE) actions = acca.acc_find_possible_actions_user(id_user=id_admin, id_action=id_action) output = '' if not actions: subtitle = 'no delegation rights' output += """

    You do not have the delegation rights over any roles.
    If you think you should have such rights, contact a WebAccess Administrator.

    """ extra = '' else: subtitle = 'step 1 - select role' output += """

    Lower level delegation of access rights to roles.
    An administrator with all rights have to give you these rights.

    """ email_out = acca.acc_get_user_email(id_user=id_user) name_role = acca.acc_get_role_name(id_role=id_role) # create list of allowed roles allowed_roles = [] allowed_id_roles = [] for (id, arglistid, name_role_help) in actions[1:]: id_role_help = acca.acc_get_role_id(name_role=name_role_help) if id_role_help and [id_role_help, name_role_help, ''] not in allowed_roles: allowed_roles.append([id_role_help, name_role_help, '']) allowed_id_roles.append(str(id_role_help)) output += createroleselect(id_role=id_role, step=1, action='delegate_deleteuserrole', roles=allowed_roles) if str(id_role) != '0' and str(id_role) in allowed_id_roles: subtitle = 'step 2 - select user' users = acca.acc_get_role_users(id_role) output += createuserselect(id_user=id_user, step=2, action='delegate_deleteuserrole', users=users, id_role=id_role) if str(id_user) != '0': subtitle = 'step 3 - confirm delete of user' email_user = acca.acc_get_user_email(id_user=id_user) output += createhiddenform(action="delegate_deleteuserrole", text='delete user %s from %s?' % (headerstrong(user=id_user), headerstrong(role=id_role)), id_role=id_role, id_user=id_user, confirm=1) if confirm: res = acca.acc_delete_user_role(id_user=id_user, id_role=id_role) if res: subtitle = 'step 4 - confirm user deleted from role' output += '

    confirm: deleted user %s from role %s.

    ' % (email_user, name_role) else: subtitle = 'step 4 - user could not be deleted' output += 'sorry, but user could not be deleted
    user is probably already deleted.' extra = """
    Connect users to role
    add users to the roles you have delegating rights to.
    """ % (id_role, ) return index(req=req, title='Remove users from roles', subtitle=subtitle, body=[output, extra], adminarea=1, authorized=1) def perform_showactiondetails(req, id_action): """show the details of an action. """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) output = createactionselect(id_action=id_action, action="showactiondetails", step=1, actions=acca.acc_get_all_actions(), button="select action") if id_action not in [0, '0']: output += actiondetails(id_action=id_action) extra = """
    Add new authorization
    add an authorization.
    Modify authorizations
    modify existing authorizations.
    Remove role
    remove all authorizations from action and a role.
    """ % (id_action, id_action, id_action) body = [output, extra] else: output += '

    no details to show

    ' body = [output] return index(req=req, title='Show Action Details', subtitle='show action details', body=body, adminarea=4) def actiondetails(id_action=0): """show details of given action. """ output = '' if id_action not in [0, '0']: name_action = acca.acc_get_action_name(id_action=id_action) output += '

    action details:

    ' output += tupletotable(header=['id', 'name', 'description', 'allowedkeywords', 'optional'], tuple=[acca.acc_get_action_details(id_action=id_action)]) roleshlp = acca.acc_get_action_roles(id_action=id_action) if roleshlp: roles = [] for (id, name, dummy) in roleshlp: res = acca.acc_find_possible_actions(id, id_action) if res: authorization_details = tupletotable(header=res[0], tuple=res[1:]) else: authorization_details = 'no details to show' roles.append([id, name, authorization_details, 'show connected users' % (id, )]) roletable = tupletotable(header=['id', 'name', 'authorization details', ''], tuple=roles) output += '

    roles connected to %s:

    \n' % (headerstrong(action=name_action, query=0), ) output += roletable else: output += '

    no roles connected to %s.

    \n' % (headerstrong(action=name_action, query=0), ) else: output += '

    no details to show

    ' return output def perform_addrole(req, id_role=0, name_role='', description='put description here.', firerole_def_src=CFG_ACC_EMPTY_ROLE_DEFINITION_SRC, confirm=0): """form to add a new role with these values: name_role - name of the new role description - optional description of the role """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) name_role = cleanstring(name_role) title='Add Role' subtitle = 'step 1 - give values to the requested fields' output = """
    role name
    description
    firewall like role definition [?]
    See the list of groups for a hint about which group names you can use.
    """ % (escape(name_role, '"'), escape(description), escape(firerole_def_src)) if name_role: # description must be changed before submitting subtitle = 'step 2 - confirm to add role' internaldesc = '' if description != 'put description here.': internaldesc = description try: firerole_def_ser = serialize(compile_role_definition(firerole_def_src)) except InvenioWebAccessFireroleError, msg: output += "%s" % msg else: text = """ add role with:
    \n name: %s
    """ % (name_role, ) if internaldesc: text += 'description: %s?\n' % (description, ) output += createhiddenform(action="addrole", text=text, name_role=escape(name_role, '"'), description=escape(description, '"'), firerole_def_src=escape(firerole_def_src, '"'), confirm=1) if confirm not in ["0", 0]: result = acca.acc_add_role(name_role=name_role, description=internaldesc, firerole_def_ser=firerole_def_ser, firerole_def_src=firerole_def_src) if result: subtitle = 'step 3 - role added' output += '

    role added:

    ' result = list(result) result[3] = result[3].replace('\n', '
    ') result = tuple(result) output += tupletotable(header=['id', 'role name', 'description', 'firewall like role definition'], tuple=[result]) else: subtitle = 'step 3 - role could not be added' output += '

    sorry, could not add role,
    role with the same name probably exists.

    ' id_role = acca.acc_get_role_id(name_role=name_role) extra = """
    Add authorization
    start adding new authorizations to role %s.
    Connect user
    connect a user to role %s.
    """ % (id_role, name_role, id_role, name_role) try: body = [output, extra] except NameError: body = [output] return index(req=req, title=title, body=body, subtitle=subtitle, adminarea=3) def perform_modifyrole(req, id_role='0', name_role='', description='put description here.', firerole_def_src='', modified='0', confirm=0): """form to add a new role with these values: name_role - name of the role to be changed description - optional description of the role firerole_def_src - optional firerole like definition of the role """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) ret = acca.acc_get_role_details(id_role) if ret and modified =='0': name_role = ret[1] description = ret[2] firerole_def_src = ret[3] if not firerole_def_src or firerole_def_src == '' or firerole_def_src is None: firerole_def_src = 'deny any' name_role = cleanstring(name_role) title='Modify Role' subtitle = 'step 1 - give values to the requested fields and confirm to modify role' output = """
    role name
    description
    firewall like role definition [?]
    See the list of groups for a hint about which group names you can use.
    """ % (id_role, escape(name_role), escape(description), escape(firerole_def_src)) if modified in [1, '1']: # description must be changed before submitting internaldesc = '' if description != 'put description here.': internaldesc = description text = """ modify role with:
    \n name: %s
    """ % (name_role, ) if internaldesc: text += 'description: %s?
    ' % (description, ) text += 'firewall like role definition: %s' % firerole_def_src.replace('\n', '
    ') try: firerole_def_ser = serialize(compile_role_definition(firerole_def_src)) except InvenioWebAccessFireroleError, msg: subtitle = 'step 2 - role could not be modified' output += '

    sorry, could not modify role because of troubles with its definition:
    %s

    ' % msg else: output += createhiddenform(action="modifyrole", text=text, id_role = id_role, name_role=escape(name_role, True), description=escape(description, True), firerole_def_src=escape(firerole_def_src, True), modified=1, confirm=1) if confirm not in ["0", 0]: result = acca.acc_update_role(id_role, name_role=name_role, description=internaldesc, firerole_def_ser=firerole_def_ser, firerole_def_src=firerole_def_src) if result: subtitle = 'step 2 - role modified' output += '

    role modified:

    ' output += tupletotable(header=['id', 'role name', 'description', 'firewall like role definition'], tuple=[(id_role, name_role, description, firerole_def_src.replace('\n', '
    '))]) else: subtitle = 'step 2 - role could not be modified' output += '

    sorry, could not modify role,
    please contact the administrator.

    ' body = [output] return index(req=req, title=title, body=body, subtitle=subtitle, adminarea=3) def perform_deleterole(req, id_role="0", confirm=0): """select a role and show all connected information, users - users that can access the role. actions - actions with possible authorizations.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) title = 'Delete role' subtitle = 'step 1 - select role to delete' name_role = acca.acc_get_role_name(id_role=id_role) output = createroleselect(id_role=id_role, action="deleterole", step=1, roles=acca.acc_get_all_roles(), button="delete role") if id_role != "0" and name_role: subtitle = 'step 2 - confirm delete of role' output += roledetails(id_role=id_role) output += createhiddenform(action="deleterole", text='delete role %s and all connections?' % (name_role, ), id_role=id_role, confirm=1) if confirm: res = acca.acc_delete_role(id_role=id_role) subtitle = 'step 3 - confirm role deleted' if res: output += "

    confirm: role %s deleted.
    " % (name_role, ) output += "%s entries were removed.

    " % (res, ) else: output += "

    sorry, the role could not be deleted.

    " elif id_role != "0": output += '

    the role has been deleted...

    ' return index(req=req, title=title, subtitle=subtitle, body=[output], adminarea=3) def perform_showroledetails(req, id_role): """show the details of a role.""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) output = createroleselect(id_role=id_role, action="showroledetails", step=1, roles=acca.acc_get_all_roles(), button="select role") if id_role not in [0, '0']: name_role = acca.acc_get_role_name(id_role=id_role) output += roledetails(id_role=id_role) extra = """
    Modify role
    modify the role you are seeing
    Add new authorization
    add an authorization.
    Modify authorizations
    modify existing authorizations.
    Connect user
    connect a user to role %(name_role)s.
    Remove user
    remove a user from role %(name_role)s.
    """ % {'id_role' : id_role, 'name_role' : name_role} body = [output, extra] else: output += '

    no details to show

    ' body = [output] return index(req=req, title='Show Role Details', subtitle='show role details', body=body, adminarea=3) def roledetails(id_role=0): """create the string to show details about a role. """ name_role = acca.acc_get_role_name(id_role=id_role) usershlp = acca.acc_get_role_users(id_role) users = [] for (id, email, dummy) in usershlp: users.append([id, email, 'show user details' % (id, )]) usertable = tupletotable(header=['id', 'email'], tuple=users, highlight_rows_p=True, alternate_row_colors_p=True) actionshlp = acca.acc_get_role_actions(id_role) actions = [] for (action_id, name, dummy) in actionshlp: res = acca.acc_find_possible_actions(id_role, action_id) if res: authorization_details = tupletotable(header=res[0], tuple=res[1:]) else: authorization_details = 'no details to show' actions.append([action_id, name, authorization_details, 'show action details' % (id_role, action_id)]) actiontable = tupletotable(header=['id', 'name', 'parameters', ''], tuple=actions) # show role details details = '

    role details:

    ' role_details = acca.acc_get_role_details(id_role=id_role) if role_details[3] is None: role_details[3] = '' role_details[3] = role_details[3].replace('\n', '
    ') # Hack for preformatting firerole rules details += tupletotable(header=['id', 'name', 'description', 'firewall like role definition'], tuple=[role_details]) # show connected users details += '

    users connected to %s:

    ' % (headerstrong(role=name_role, query=0), ) if users: details += usertable else: details += '

    no users connected.

    ' # show connected authorizations details += '

    authorizations for %s:

    ' % (headerstrong(role=name_role, query=0), ) if actions: details += actiontable else: details += '

    no authorizations connected

    ' return details def perform_adduserrole(req, id_role='0', email_user_pattern='', id_user='0', confirm=0): """create connection between user and role. id_role - id of the role to add user to email_user_pattern - search for users using this pattern id_user - id of user to add to the role. """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) email_out = acca.acc_get_user_email(id_user=id_user) name_role = acca.acc_get_role_name(id_role=id_role) title = 'Connect user to role ' subtitle = 'step 1 - select a role' output = createroleselect(id_role=id_role, action="adduserrole", step=1, roles=acca.acc_get_all_roles()) # role is selected if id_role != "0": title += name_role subtitle = 'step 2 - search for users' # remove letters not allowed in an email email_user_pattern = cleanstring_email(email_user_pattern) text = ' 2. search for user \n' text += ' \n' % (email_user_pattern, ) output += createhiddenform(action="adduserrole", text=text, button="search for users", id_role=id_role) # pattern is entered if email_user_pattern: # users with matching email-address try: users1 = run_sql("""SELECT id, email FROM user WHERE email<>'' AND email RLIKE %s ORDER BY email """, (email_user_pattern, )) except OperationalError: users1 = () # users that are connected try: users2 = run_sql("""SELECT DISTINCT u.id, u.email FROM user u LEFT JOIN user_accROLE ur ON u.id = ur.id_user WHERE ur.id_accROLE = %s AND u.email RLIKE %s ORDER BY u.email """, (id_role, email_user_pattern)) except OperationalError: users2 = () # no users that match the pattern if not (users1 or users2): output += '

    no qualified users, try new search.

    ' elif len(users1) > MAXSELECTUSERS: output += '

    %s hits, too many qualified users, specify more narrow search. (limit %s)

    ' % (len(users1), MAXSELECTUSERS) # show matching users else: subtitle = 'step 3 - select a user' users = [] extrausers = [] for (user_id, email) in users1: if (user_id, email) not in users2: users.append([user_id,email,'']) for (user_id, email) in users2: extrausers.append([-user_id, email,'']) output += createuserselect(id_user=id_user, action="adduserrole", step=3, users=users, extrausers=extrausers, button="add this user", id_role=id_role, email_user_pattern=email_user_pattern) try: id_user = int(id_user) except ValueError: pass # user selected already connected to role if id_user < 0: output += '

    users in brackets are already attached to the role, try another one...

    ' # a user is selected elif email_out: subtitle = "step 4 - confirm to add user" output += createhiddenform(action="adduserrole", text='add user %s to role %s?' % (email_out, name_role), id_role=id_role, email_user_pattern=email_user_pattern, id_user=id_user, confirm=1) # it is confirmed that this user should be added if confirm: # add user result = acca.acc_add_user_role(id_user=id_user, id_role=id_role) if result and result[2]: subtitle = 'step 5 - confirm user added' output += '

    confirm: user %s added to role %s.

    ' % (email_out, name_role) else: subtitle = 'step 5 - user could not be added' output += '

    sorry, but user could not be added.

    ' extra = """
    Create new role
    go here to add a new role.
    """ if str(id_role) != "0": extra += """
    Remove users
    remove users from role %s.
    Connected users
    show all users connected to role %s.
    Add authorization
    start adding new authorizations to role %s.
    """ % (id_role, name_role, id_role, name_role, id_role, name_role) return index(req=req, title=title, subtitle=subtitle, body=[output, extra], adminarea=3) def perform_addroleuser(req, email_user_pattern='', id_user='0', id_role='0', confirm=0): """delete connection between role and user. id_role - id of role to disconnect id_user - id of user to disconnect. """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) email_out = acca.acc_get_user_email(id_user=id_user) name_role = acca.acc_get_role_name(id_role=id_role) # used to sort roles, and also to determine right side links con_roles = [] not_roles = [] title = 'Connect user to roles' subtitle = 'step 1 - search for users' # clean email search string email_user_pattern = cleanstring_email(email_user_pattern) text = ' 1. search for user \n' text += ' \n' % (email_user_pattern, ) output = createhiddenform(action='addroleuser', text=text, button='search for users', id_role=id_role) if email_user_pattern: subtitle = 'step 2 - select user' try: users1 = run_sql("""SELECT id, email FROM user WHERE email<>'' AND email RLIKE %s ORDER BY email """, (email_user_pattern, )) except OperationalError: users1 = () users = [] for (id, email) in users1: users.append([id, email, '']) # no users if not users: output += '

    no qualified users, try new search.

    ' # too many users elif len(users) > MAXSELECTUSERS: output += '

    %s hits, too many qualified users, specify more narrow search. (limit %s)

    ' % (len(users), MAXSELECTUSERS) # ok number of users else: output += createuserselect(id_user=id_user, action='addroleuser', step=2, users=users, button='select user', email_user_pattern=email_user_pattern) if int(id_user): subtitle = 'step 3 - select role' # roles the user is connected to role_ids = acca.acc_get_user_roles(id_user=id_user) # all the roles, lists are sorted on the background of these... all_roles = acca.acc_get_all_roles() # sort the roles in connected and not connected roles for (id, name, description, dummy, dummy) in all_roles: if id in role_ids: con_roles.append([-id, name, description]) else: not_roles.append([id, name, description]) # create roleselect output += createroleselect(id_role=id_role, action='addroleuser', step=3, roles=not_roles, extraroles=con_roles, extrastamp='(connected)', button='add this role', email_user_pattern=email_user_pattern, id_user=id_user) if int(id_role) < 0: name_role = acca.acc_get_role_name(id_role=-int(id_role)) output += '

    role %s already connected to the user, try another one...

    ' % (name_role, ) elif int(id_role): subtitle = 'step 4 - confirm to add role to user' output += createhiddenform(action='addroleuser', text='add role %s to user %s?' % (name_role, email_out), email_user_pattern=email_user_pattern, id_user=id_user, id_role=id_role, confirm=1) if confirm: # add role result = acca.acc_add_user_role(id_user=id_user, id_role=id_role) if result and result[2]: subtitle = 'step 5 - confirm role added' output += '

    confirm: role %s added to user %s.

    ' % (name_role, email_out) else: subtitle = 'step 5 - role could not be added' output += '

    sorry, but role could not be added

    ' extra = """
    Create new role
    go here to add a new role.
    """ if int(id_user) and con_roles: extra += """
    Remove roles
    disconnect roles from user %s.
    """ % (id_user, email_out) if int(id_role): if int(id_role) < 0: id_role = -int(id_role) extra += """
    Remove users
    disconnect users from role %s.
    """ % (id_role, name_role) return index(req=req, title=title, subtitle=subtitle, body=[output, extra], adminarea=5) def perform_deleteuserrole(req, id_role='0', id_user='0', reverse=0, confirm=0): """delete connection between role and user. id_role - id of role to disconnect id_user - id of user to disconnect. """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) title = 'Remove user from role' email_user = acca.acc_get_user_email(id_user=id_user) name_role = acca.acc_get_role_name(id_role=id_role) output = '' if reverse in [0, '0']: adminarea = 3 subtitle = 'step 1 - select the role' output += createroleselect(id_role=id_role, action="deleteuserrole", step=1, roles=acca.acc_get_all_roles()) if id_role != "0": subtitle = 'step 2 - select the user' output += createuserselect(id_user=id_user, action="deleteuserrole", step=2, users=acca.acc_get_role_users(id_role=id_role), id_role=id_role) else: adminarea = 5 # show only if user is connected to a role, get users connected to roles users = run_sql("""SELECT DISTINCT(u.id), u.email, u.note FROM user u LEFT JOIN user_accROLE ur ON u.id = ur.id_user WHERE ur.id_accROLE != 'NULL' AND u.email != '' ORDER BY u.email """) has_roles = 1 # check if the user is connected to any roles for (id, email, note) in users: if str(id) == str(id_user): break # user not connected to a role else: subtitle = 'step 1 - user not connected' output += '

    no need to remove roles from user %s,
    user is not connected to any roles.

    ' % (email_user, ) has_roles, id_user = 0, '0' # stop the rest of the output below... # user connected to roles if has_roles: output += createuserselect(id_user=id_user, action="deleteuserrole", step=1, users=users, reverse=reverse) if id_user != "0": subtitle = 'step 2 - select the role' role_ids = acca.acc_get_user_roles(id_user=id_user) all_roles = acca.acc_get_all_roles() roles = [] for (id, name, desc, dummy, dummy) in all_roles: if id in role_ids: roles.append([id, name, desc]) output += createroleselect(id_role=id_role, action="deleteuserrole", step=2, roles=roles, id_user=id_user, reverse=reverse) if id_role != '0' and id_user != '0': subtitle = 'step 3 - confirm delete of user' output += createhiddenform(action="deleteuserrole", text='delete user %s from %s?' % (headerstrong(user=id_user), headerstrong(role=id_role)), id_role=id_role, id_user=id_user, reverse=reverse, confirm=1) if confirm: res = acca.acc_delete_user_role(id_user=id_user, id_role=id_role) if res: subtitle = 'step 4 - confirm delete of user' output += '

    confirm: deleted user %s from role %s.

    ' % (email_user, name_role) else: subtitle = 'step 4 - user could not be deleted' output += 'sorry, but user could not be deleted
    user is probably already deleted.' extra = '' if str(id_role) != "0": extra += """
    Connect user
    add users to role %s.
    """ % (id_role, name_role) if int(reverse): extra += """
    Remove user
    remove users from role %s.
    """ % (id_role, name_role) extra += '
    ' if str(id_user) != "0": extra += """
    Connect role
    add roles to user %s.
    """ % (email_user, id_user, email_user) if not int(reverse): extra += """
    Remove role
    remove roles from user %s.
    """ % (id_user, email_user, email_user) extra += '
    ' if extra: body = [output, extra] else: body = [output] return index(req=req, title=title, subtitle=subtitle, body=body, adminarea=adminarea) def perform_showuserdetails(req, id_user=0): """show the details of a user. """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) if id_user not in [0, '0']: output = userdetails(id_user=id_user) email_user = acca.acc_get_user_email(id_user=id_user) extra = """
    Connect role
    connect a role to user %s.
    Remove role
    remove a role from user %s.
    """ % (id_user, email_user, email_user, id_user, email_user) body = [output, extra] else: body = ['

    no details to show

    '] return index(req=req, title='Show User Details', subtitle='show user details', body=body, adminarea=5) def userdetails(id_user=0): """create the string to show details about a user. """ # find necessary details email_user = acca.acc_get_user_email(id_user=id_user) userroles = acca.acc_get_user_roles(id_user=id_user) conn_roles = [] # find connected roles for (id, name, desc, dummy, dummy) in acca.acc_get_all_roles(): if id in userroles: conn_roles.append([id, name, desc]) conn_roles[-1].append('show details' % (id, )) if conn_roles: # print details details = '

    roles connected to user %s

    ' % (email_user, ) details += tupletotable(header=['id', 'name', 'description', ''], tuple=conn_roles) else: details = '

    no roles connected to user %s.

    ' % (email_user, ) return details def perform_addauthorization(req, id_role="0", id_action="0", optional=0, reverse="0", confirm=0, **keywords): """ form to add new connection between user and role: id_role - role to connect id_action - action to connect reverse - role or action first? """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) # values that might get used name_role = acca.acc_get_role_name(id_role=id_role) or id_role name_action = acca.acc_get_action_name(id_action=id_action) or id_action optional = optional == 'on' and 1 or int(optional) extra = """
    Create new role
    go here to add a new role.
    """ # create the page according to which step the user is on # role -> action -> arguments if reverse in ["0", 0]: adminarea = 3 subtitle = 'step 1 - select role' output = createroleselect(id_role=id_role, action="addauthorization", step=1, roles=acca.acc_get_all_roles(), reverse=reverse) if str(id_role) != "0": subtitle = 'step 2 - select action' rolacts = acca.acc_get_role_actions(id_role) allhelp = acca.acc_get_all_actions() allacts = [] for r in allhelp: if r not in rolacts: allacts.append(r) output += createactionselect(id_action=id_action, action="addauthorization", step=2, actions=rolacts, extraactions=allacts, id_role=id_role, reverse=reverse) # action -> role -> arguments else: adminarea = 4 subtitle = 'step 1 - select action' output = createactionselect(id_action=id_action, action="addauthorization", step=1, actions=acca.acc_get_all_actions(), reverse=reverse) if str(id_action) != "0": subtitle = 'step 2 - select role' actroles = acca.acc_get_action_roles(id_action) allhelp = acca.acc_get_all_roles() allroles = [] for r in allhelp: if r not in actroles: allroles.append(r) output += createroleselect(id_role=id_role, action="addauthorization", step=2, roles=actroles, extraroles=allroles, id_action=id_action, reverse=reverse) # ready for step 3 no matter which direction we took to get here if id_action != "0" and id_role != "0": # links to adding authorizations in the other direction if str(reverse) == "0": extra += """
    Add authorization
    add authorizations to action %s.
    """ % (id_action, name_action) else: extra += """
    Add authorization
    add authorizations to role %s.
    """ % (id_role, name_role) subtitle = 'step 3 - enter values for the keywords\n' output += """
    """ % (id_role, id_action, reverse) # the actions argument keywords res_keys = acca.acc_get_action_keywords(id_action=id_action) # res used to display existing authorizations # res used to determine if showing "create connection without arguments" res_auths = acca.acc_find_possible_actions(id_role, id_action) if not res_keys: # action without arguments if not res_auths: output += """ create connection between %s?
    """ % (headerstrong(role=name_role, action=name_action, query=0), ) else: output += '

    connection without arguments is already created.

    ' else: # action with arguments optionalargs = acca.acc_get_action_is_optional(id_action=id_action) output += '3. authorized arguments
    ' if optionalargs: # optional arguments output += """

    connect %s to %s for any arguments
    connect %s to %s for only these argument cases:

    """ % (optional and 'checked="checked"' or '', name_role, name_action, not optional and 'checked="checked"' or '', name_role, name_action) # list the arguments allkeys = 1 for key in res_keys: output += '%s \n \n' output += '\n' # ask for confirmation if str(allkeys) != "0" or optional: keys = keywords.keys() keys.reverse() subtitle = 'step 4 - confirm add of authorization\n' text = """ create connection between
    %s
    """ % (headerstrong(role=name_role, action=name_action, query=0), ) if optional: text += 'withouth arguments' keywords = {} else: for key in keys: text += '%s: %s \n' % (escape(key), escape(keywords[key])) output += createhiddenform(action="addauthorization", text=text, id_role=id_role, id_action=id_action, reverse=reverse, confirm=1, optional=optional, **keywords) # show existing authorizations, found authorizations further up in the code... # res_auths = acca.acc_find_possible_actions(id_role, id_action) output += '

    existing authorizations:

    ' if res_auths: output += tupletotable(header=res_auths[0], tuple=res_auths[1:]) # shortcut to modifying authorizations extra += """
    Modify authorizations
    modify the existing authorizations.
    """ % (id_role, id_action, reverse) else: output += '

    no details to show

    ' # user confirmed to add entries if confirm: subtitle = 'step 5 - confirm authorization added' res1 = acca.acc_add_authorization(name_role=name_role, name_action=name_action, optional=optional, **keywords) if res1: res2 = acca.acc_find_possible_actions(id_role, id_action) arg = res1[0][3] # the arglistid new = [res2[0]] for row in res2[1:]: if int(row[0]) == int(arg): new.append(row) newauths = tupletotable(header=new[0], tuple=new[1:]) newentries = tupletotable(header=['role id', 'action id', 'argument id', '#'], tuple=res1) st = 'style="vertical-align: top"' output += """

    new authorization and entries:

    %s %s
    """ % (st, newauths, st, newentries) else: output += '

    sorry, authorization could not be added,
    it probably already exists

    ' # trying to put extra link on the right side try: body = [output, extra] except NameError: body = [output] return index(req=req, title = 'Create entry for new authorization', subtitle=subtitle, body=body, adminarea=adminarea) def perform_deleteroleaction(req, id_role="0", id_action="0", reverse=0, confirm=0): """delete all connections between a role and an action. id_role - id of the role id_action - id of the action reverse - 0: ask for role first 1: ask for action first""" (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) title = 'Remove action from role ' if reverse in ["0", 0]: # select role -> action adminarea = 3 subtitle = 'step 1 - select a role' output = createroleselect(id_role=id_role, action="deleteroleaction", step=1, roles=acca.acc_get_all_roles(), reverse=reverse) if id_role != "0": rolacts = acca.acc_get_role_actions(id_role=id_role) subtitle = 'step 2 - select the action' output += createactionselect(id_action=id_action, action="deleteroleaction", step=2, actions=rolacts, reverse=reverse, id_role=id_role, button="remove connection and all authorizations") else: # select action -> role adminarea = 4 subtitle = 'step 1 - select an action' output = createactionselect(id_action=id_action, action="deleteroleaction", step=1, actions=acca.acc_get_all_actions(), reverse=reverse) if id_action != "0": actroles = acca.acc_get_action_roles(id_action=id_action) subtitle = 'step 2 - select the role' output += createroleselect(id_role=id_role, action="deleteroleaction", step=2, roles=actroles, button="remove connection and all authorizations", id_action=id_action, reverse=reverse) if id_action != "0" and id_role != "0": subtitle = 'step 3 - confirm to remove authorizations' # ask for confirmation res = acca.acc_find_possible_actions(id_role, id_action) if res: output += '

    authorizations that will be deleted:

    ' output += tupletotable(header=res[0], tuple=res[1:]) output += createhiddenform(action="deleteroleaction", text='remove %s from %s' % (headerstrong(action=id_action), headerstrong(role=id_role)), confirm=1, id_role=id_role, id_action=id_action, reverse=reverse) else: output += 'no authorizations' # confirmation is given if confirm: subtitle = 'step 4 - confirm authorizations removed ' res = acca.acc_delete_role_action(id_role=id_role, id_action=id_action) if res: output += '

    confirm: removed %s from %s
    ' % (headerstrong(action=id_action), headerstrong(role=id_role)) output += '%s entries were removed.

    ' % (res, ) else: output += '

    sorry, no entries could be removed.

    ' return index(req=req, title=title, subtitle=subtitle, body=[output], adminarea=adminarea) def perform_modifyauthorizations(req, id_role="0", id_action="0", reverse=0, confirm=0, errortext='', sel='', authids=[]): """given ids of a role and an action, show all possible action combinations with checkboxes and allow user to access other functions. id_role - id of the role id_action - id of the action reverse - 0: ask for role first 1: ask for action first sel - which button and modification that is selected errortext - text to print when no connection exist between role and action authids - ids of checked checkboxes """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) name_role = acca.acc_get_role_name(id_role) name_action = acca.acc_get_action_name(id_action) output = '' try: id_role, id_action, reverse = int(id_role), int(id_action), int(reverse) except ValueError: pass extra = """
    Create new role
    go here to add a new role.
    """ if id_role or id_action: extra += '\n
    \n' if id_role and id_action: extra += """
    Add authorizations
    add an authorization to the existing ones.
    """ % (id_role, id_action, reverse) if id_role: extra += """
    Add authorizations
    add to role %s.
    """ % (id_role, name_role) if id_action: extra += """
    Add authorizations
    add to action %s.
    """ % (id_action, name_action) extra += '\n
    \n' if not reverse: # role -> action adminarea = 3 subtitle = 'step 1 - select the role' output += createroleselect(id_role=str(id_role), action="modifyauthorizations", step=1, roles=acca.acc_get_all_roles(), reverse=reverse) if id_role: rolacts = acca.acc_get_role_actions(id_role=id_role) subtitle = 'step 2 - select the action' output += createactionselect(id_action=str(id_action), action="modifyauthorizations", step=2, actions=rolacts, id_role=id_role, reverse=reverse) else: adminarea = 4 # action -> role subtitle = 'step 1 - select the action' output += createactionselect(id_action=str(id_action), action="modifyauthorizations", step=1, actions=acca.acc_get_all_actions(), reverse=reverse) if id_action: actroles = acca.acc_get_action_roles(id_action=id_action) subtitle = 'step 2 - select the role' output += createroleselect(id_role=str(id_role), action="modifyauthorizations", step=2, roles=actroles, id_action=id_action, reverse=reverse) if errortext: output += '

    %s

    ' % (errortext, ) if id_role and id_action: # adding to main area if type(authids) is not list: authids = [authids] subtitle = 'step 3 - select groups and modification' # get info res = acca.acc_find_possible_actions(id_role, id_action) # clean the authids hiddenids = [] if sel in ['delete selected']: hiddenids = authids[:] elif sel in ['split groups', 'merge groups']: for authid in authids: arghlp = res[int(authid)][0] if authid not in hiddenids and arghlp not in [-1, '-1', 0, '0']: hiddenids.append(authid) authids = hiddenids[:] if confirm: # do selected modification and output with new authorizations if sel == 'split groups': res = splitgroups(id_role, id_action, authids) elif sel == 'merge groups': res = mergegroups(id_role, id_action, authids) elif sel == 'delete selected': res = deleteselected(id_role, id_action, authids) authids = [] res = acca.acc_find_possible_actions(id_role, id_action) output += 'authorizations after %s.
    \n' % (sel, ) elif sel and authids: output += 'confirm choice of authorizations and modification.
    \n' else: output += 'select authorizations and perform modification.
    \n' if not res: errortext = 'all connections deleted, try different ' if reverse in ["0", 0]: return perform_modifyauthorizations(req=req, id_role=id_role, errortext=errortext + 'action.') else: return perform_modifyauthorizations(req=req, id_action=id_action, reverse=reverse, errortext=errortext + 'role.') # display output += modifyauthorizationsmenu(id_role, id_action, header=res[0], tuple=res[1:], checked=authids, reverse=reverse) if sel and authids: subtitle = 'step 4 - confirm to perform modification' # form with hidden authids output += '
    \n' % ('modifyauthorizations', ) for hiddenid in hiddenids: output += '\n' % (hiddenid, ) # choose what to do if sel == 'split groups': output += '

    split groups containing:

    ' elif sel == 'merge groups': output += '

    merge groups containing:

    ' elif sel == 'delete selected': output += '

    delete selected entries:

    ' extracolumn = '\n' extracolumn += '\n' # show the entries here... output += tupletotable_onlyselected(header=res[0], tuple=res[1:], selected=hiddenids, extracolumn=extracolumn) output += '\n' \ % (id_role, ) output += '\n' \ % (id_action, ) output += '\n' \ % (sel, ) output += '\n' \ % (reverse, ) output += '
    ' # tried to perform modification without something selected elif sel and not authids and not confirm: output += '

    no valid groups selected

    ' # trying to put extra link on the right side try: body = [output, extra] except NameError: body = [output] # Display the page return index(req=req, title='Modify Authorizations', subtitle=subtitle, body=body, adminarea=adminarea) def modifyauthorizationsmenu(id_role, id_action, tuple=[], header=[], checked=[], reverse=0): """create table with header and checkboxes, used for multiple choice. makes use of tupletotable to add the actual table id_role - selected role, hidden value in the form id_action - selected action, hidden value in the form tuple - all rows to be put in the table (with checkboxes) header - column headers, empty strings added at start and end checked - ids of rows to be checked """ if not tuple: return 'no authorisations...' argnum = len(acca.acc_get_action_keywords(id_action=id_action)) tuple2 = [] for t in tuple: tuple2.append(t[:]) tuple2 = addcheckboxes(datalist=tuple2, name='authids', startindex=1, checked=checked) hidden = ' \n' \ % (id_role, ) hidden += ' \n' \ % (id_action, ) hidden += ' \n' \ % (reverse, ) button = '\n' if argnum > 1: button += '\n' button += '\n' hdrstr = '' for h in [''] + header + ['']: hdrstr += ' %s\n' % (h, ) if hdrstr: hdrstr = ' \n%s\n \n' % (hdrstr, ) output = '
    \n' output += ' \n' output += hdrstr output += '\n' % (hidden, ) align = ['admintdleft'] * len(tuple2[0]) try: align[1] = 'admintdright' except IndexError: pass output += '' for i in range(len(tuple2[0])): output += '\n' % (align[i], tuple2[0][i]) output += '\n' \ % (len(tuple2), button) output += '\n' for row in tuple2[1:]: output += ' \n' for i in range(len(row)): output += '\n' % (align[i], row[i]) output += ' \n' output += '
    %s
    %s\n%s\n
    %s
    \n
    \n' return output def splitgroups(id_role=0, id_action=0, authids=[]): """get all the old ones, gather up the arglistids find a list of arglistidgroups to be split, unique get all actions in groups outside of the old ones, (old arglistid is allowed). show them like in showselect. """ if not id_role or not id_action or not authids: return 0 # find all the actions datalist = acca.acc_find_possible_actions(id_role, id_action) if type(authids) is str: authids = [authids] for i in range(len(authids)): authids[i] = int(authids[i]) # argumentlistids of groups to be split splitgrps = [] for authid in authids: hlp = datalist[authid][0] if hlp not in splitgrps and authid in range(1, len(datalist)): splitgrps.append(hlp) # split groups and return success or failure result = 1 for splitgroup in splitgrps: result = 1 and acca.acc_split_argument_group(id_role, id_action, splitgroup) return result def mergegroups(id_role=0, id_action=0, authids=[]): """get all the old ones, gather up the argauthids find a list of arglistidgroups to be split, unique get all actions in groups outside of the old ones, (old arglistid is allowed). show them like in showselect.""" if not id_role or not id_action or not authids: return 0 datalist = acca.acc_find_possible_actions(id_role, id_action) if type(authids) is str: authids = [authids] for i in range(len(authids)): authids[i] = int(authids[i]) # argumentlistids of groups to be merged mergegroups = [] for authid in authids: hlp = datalist[authid][0] if hlp not in mergegroups and authid in range(1, len(datalist)): mergegroups.append(hlp) # merge groups and return success or failure if acca.acc_merge_argument_groups(id_role, id_action, mergegroups): return 1 else: return 0 def deleteselected(id_role=0, id_action=0, authids=[]): """delete checked authorizations/possible actions, ids in authids. id_role - role to delete from id_action - action to delete from authids - listids for which possible actions to delete.""" if not id_role or not id_action or not authids: return 0 if type(authids) in [str, int]: authids = [authids] for i in range(len(authids)): authids[i] = int(authids[i]) result = acca.acc_delete_possible_actions(id_role=id_role, id_action=id_action, authids=authids) return result def headeritalic(**ids): """transform keyword=value pairs to string with value in italics. **ids - a dictionary of pairs to create string from """ output = '' value = '' table = '' for key in ids.keys(): if key in ['User', 'user']: value, table = 'email', 'user' elif key in ['Role', 'role']: value, table = 'name', 'accROLE' elif key in ['Action', 'action']: value, table = 'name', 'accACTION' else: if output: output += ' and ' output += ' %s %s' % (key, ids[key]) continue res = run_sql("""SELECT %%s FROM %s WHERE id = %%s""" % wash_table_column_name(table), (value, ids[key])) # kwalitee: disable=sql if res: if output: output += ' and ' output += ' %s %s' % (key, res[0][0]) return output def headerstrong(query=1, **ids): """transform keyword=value pairs to string with value in strong text. **ids - a dictionary of pairs to create string from query - 1 -> try to find names to ids of role, user and action. 0 -> do not try to find names, use the value passed on """ output = '' value = '' table = '' for key in ids.keys(): if key in ['User', 'user']: value, table = 'email', 'user' elif key in ['Role', 'role']: value, table = 'name', 'accROLE' elif key in ['Action', 'action']: value, table = 'name', 'accACTION' else: if output: output += ' and ' output += ' %s %s' % (key, ids[key]) continue if query: res = run_sql("""SELECT %%s FROM %s WHERE id = %%s""" % wash_table_column_name(table), (value, ids[key])) # kwalitee: disable=sql if res: if output: output += ' and ' output += ' %s %s' % (key, res[0][0]) else: if output: output += ' and ' output += ' %s %s' % (key, ids[key]) return output def startpage(): """create the menu for the startpage""" body = """
    selection for WebAccess Admin
    Role Area
    main area to configure administration rights and authorization rules.
    Action Area
    configure administration rights with the actions as starting point.
    User Area
    configure administration rights with the users as starting point.
    Reset Area
    reset roles, actions and authorizations.
    Manage accounts Area
    manage user accounts.
    Delegate Rights - With Restrictions
    delegate your rights for some roles.
    Manage Robot Login
    Manage robot login keys and test URLs
    """ return body def rankarea(): return "Rankmethod area" def perform_simpleauthorization(req, id_role=0, id_action=0): """show a page with simple overview of authorizations between a connected role and action. """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) res = acca.acc_find_possible_actions(id_role, id_action) if res: extra = createhiddenform(action='modifyauthorizations', button='modify authorizations', id_role=id_role, id_action=id_action) output = '

    authorizations for %s:

    ' \ % (headerstrong(action=id_action, role=id_role), ) output += tupletotable(header=res[0], tuple=res[1:], extracolumn=extra) else: output = 'no details to show' return index(req=req, title='Simple authorization details', subtitle='simple authorization details', body=[output], adminarea=3) def perform_showroleusers(req, id_role=0): """show a page with simple overview of a role and connected users. """ (auth_code, auth_message) = is_adminuser(req) if auth_code != 0: return mustloginpage(req, auth_message) res = acca.acc_get_role_users(id_role=id_role) name_role = acca.acc_get_role_name(id_role=id_role) if res: users = [] for (role_id, name, dummy) in res: users.append([role_id, name, 'show user details' % (role_id, )]) output = '

    users connected to %s:

    ' \ % (headerstrong(role=id_role), ) output += tupletotable(header=['id', 'name', ''], tuple=users) else: output = 'no users connected to role %s' \ % (name_role, ) extra = """
    Connect user
    connect users to the role.
    """ % (id_role, ) return index(req=req, title='Users connected to role %s' % (name_role, ), subtitle='simple details', body=[output, extra], adminarea=3) def createselect(id_input="0", label="", step=0, name="", action="", list=[], extralist=[], extrastamp='', button="", **hidden): """create form with select and hidden values id - the one to choose as selected if exists label - label shown to the left of the select name - the name of the select on which to reference it list - primary list to select from extralist - list of options to be put in paranthesis extrastamp - stamp extralist entries with this if not '' usually paranthesis around the entry button - the value/text to be put on the button **hidden - name=value pairs to be put as hidden in the form. """ step = step and '%s. ' % step or '' output = '
    \n' % (action, ) output += ' %s\n' % (step + label, ) output += ' \n' for key in hidden.keys(): output += ' \n' \ % (key, hidden[key]) output += ' \n' \ % (button, ) output += '
    \n' return output def createactionselect(id_action="0", label="select action", step=0, name="id_action", action="", actions=[], extraactions=[], extrastamp='', button="select action", **hidden): """create a select for roles in a form. see createselect.""" return createselect(id_input=id_action, label=label, step=step, name=name, action=action, list=actions, extralist=extraactions, extrastamp=extrastamp, button=button, **hidden) def createroleselect(id_role="0", label="select role", step=0, name="id_role", action="", roles=[], extraroles=[], extrastamp='', button="select role", **hidden): """create a select for roles in a form. see createselect.""" return createselect(id_input=id_role, label=label, step=step, name=name, action=action, list=roles, extralist=extraroles, extrastamp=extrastamp, button=button, **hidden) def createuserselect(id_user="0", label="select user", step=0, name="id_user", action="", users=[], extrausers=[], extrastamp='(connected)', button="select user", **hidden): """create a select for users in a form.see createselect.""" return createselect(id_input=id_user, label=label, step=step, name=name, action=action, list=users, extralist=extrausers, extrastamp=extrastamp, button=button, **hidden) def cleanstring(txt='', comma=0): """clean all the strings before submitting to access control admin. remove characters not letter, number or underscore, also remove leading underscores and numbers. return cleaned string. str - string to be cleaned comma - 1 -> allow the comma to divide multiple arguments 0 -> wash commas as well """ # remove not allowed characters txt = re.sub(r'[^a-zA-Z0-9_,]', '', txt) # split string on commas items = txt.split(',') txt = '' for item in items: if not item: continue if comma and txt: txt += ',' # create valid variable names txt += re.sub(r'^([0-9_])*', '', item) return txt def cleanstring_argumentvalue(txt=''): """clean the value of an argument before submitting it. allowed characters: a-z A-Z 0-9 _ * and space txt - string to be cleaned """ # remove not allowed characters txt = re.sub(r'[^a-zA-Z0-9_ *.]', '', txt) # trim leading and ending spaces txt = re.sub(r'^ *| *$', '', txt) return txt def cleanstring_email(txt=''): """clean the string and return a valid email address. txt - string to be cleaned """ # remove not allowed characters txt = re.sub(r'[^a-zA-Z0-9_.@-]', '', txt) return txt def check_email(txt=''): """control that submitted emails are correct. this little check is not very good, but better than nothing. """ r = re.compile(r'(.)+\@(.)+\.(.)+') return r.match(txt) and 1 or 0 def send_account_activated_message(account_email, send_to, password, ln=CFG_SITE_LANG): """Send an email to the address given by send_to about the new activated account.""" _ = gettext_set_language(ln) sub = _("Your account on '%s' has been activated") % CFG_SITE_NAME body = _("Your account earlier created on '%s' has been activated:") \ % CFG_SITE_NAME + '\n\n' body += ' ' + _("Username/Email:") + " %s\n" % account_email body += ' ' + _("Password:") + " %s\n" % ("*" * len(str(password))) body += "\n---------------------------------" body += "\n%s" % CFG_SITE_NAME return send_email(CFG_SITE_SUPPORT_EMAIL, send_to, sub, body, header='') def send_new_user_account_warning(new_account_email, send_to, password, ln=CFG_SITE_LANG): """Send an email to the address given by send_to about the new account new_account_email.""" _ = gettext_set_language(ln) sub = _("Account created on '%s'") % CFG_SITE_NAME body = _("An account has been created for you on '%s':") % CFG_SITE_NAME + '\n\n' body += ' ' + _("Username/Email:") + " %s\n" % new_account_email body += ' ' + _("Password:") + " %s\n" % ("*" * len(str(password))) body += "\n---------------------------------" body += "\n%s" % CFG_SITE_NAME return send_email(CFG_SITE_SUPPORT_EMAIL, send_to, sub, body, header='') def send_account_rejected_message(new_account_email, send_to, ln=CFG_SITE_LANG): """Send an email to the address given by send_to about the new account new_account_email.""" _ = gettext_set_language(ln) sub = _("Account rejected on '%s'") % CFG_SITE_NAME body = _("Your request for an account has been rejected on '%s':") \ % CFG_SITE_NAME + '\n\n' body += ' ' + _("Username/Email: %s") % new_account_email + "\n" body += "\n---------------------------------" body += "\n%s" % CFG_SITE_NAME return send_email(CFG_SITE_SUPPORT_EMAIL, send_to, sub, body, header='') def send_account_deleted_message(new_account_email, send_to, ln=CFG_SITE_LANG): """Send an email to the address given by send_to about the new account new_account_email.""" _ = gettext_set_language(ln) sub = _("Account deleted on '%s'") % CFG_SITE_NAME body = _("Your account on '%s' has been deleted:") % CFG_SITE_NAME + '\n\n' body += ' ' + _("Username/Email:") + " %s\n" % new_account_email body += "\n---------------------------------" body += "\n%s" % CFG_SITE_NAME return send_email(CFG_SITE_SUPPORT_EMAIL, send_to, sub, body, header='') def usage(exitcode=1, msg=""): """Prints usage info.""" if msg: print >> sys.stderr, "Error: %s." % msg print >> sys.stderr print >> sys.stderr, """Usage: %s [options] General options: -h, --help\t\tprint this help -V, --version\t\tprint version number Authentication options: -u, --user=USER\tUser name needed to perform the administrative task Option to administrate authorizations: -a, --add\t\tadd default authorization settings -c, --compile\t\tcompile firewall like role definitions (FireRole) -r, --reset\t\treset to default settings -D, --demo\t\tto be used with -a or -r in order to consider demo site authorizationss """ % sys.argv[0] sys.exit(exitcode) def main(): """Main function that analyzes command line input and calls whatever is appropriate. """ ## parse command line: # set user-defined options: options = {'user' : '', 'reset' : 0, 'compile' : 0, 'add' : 0, 'demo' : 0} try: opts, args = getopt.getopt(sys.argv[1:], "hVu:racD", ["help", "version", "user=", "reset", "add", "compile", "demo"]) except getopt.GetoptError, err: usage(1, err) try: for opt in opts: if opt[0] in ("-h", "--help"): usage(0) elif opt[0] in ("-V", "--version"): print __revision__ sys.exit(0) elif opt[0] in ("-u", "--user"): options["user"] = opt[1] elif opt[0] in ("-r", "--reset"): options["reset"] = 1 elif opt[0] in ("-a", "--add"): options["add"] = 1 elif opt[0] in ("-c", "--compile"): options["compile"] = 1 elif opt[0] in ("-D", "--demo"): options["demo"] = 1 else: usage(1) if options['add'] or options['reset'] or options['compile']: if acca.acc_get_action_id('cfgwebaccess'): # Action exists hence authentication works :-) options['user'] = authenticate(options['user'], authorization_msg="WebAccess Administration", authorization_action="cfgwebaccess") if options['reset'] and options['demo']: acca.acc_reset_default_settings([CFG_SITE_ADMIN_EMAIL], DEF_DEMO_USER_ROLES, DEF_DEMO_ROLES, DEF_DEMO_AUTHS) print "Reset default demo site settings." elif options['reset']: acca.acc_reset_default_settings([CFG_SITE_ADMIN_EMAIL]) print "Reset default settings." elif options['add'] and options['demo']: acca.acc_add_default_settings([CFG_SITE_ADMIN_EMAIL], DEF_DEMO_USER_ROLES, DEF_DEMO_ROLES, DEF_DEMO_AUTHS) print "Added default demo site settings." elif options['add']: acca.acc_add_default_settings([CFG_SITE_ADMIN_EMAIL]) print "Added default settings." if options['compile']: repair_role_definitions() print "Compiled firewall like role definitions." else: usage(1, "You must specify at least one command") except StandardError, e: register_exception() usage(e) return ### okay, here we go: if __name__ == '__main__': main() diff --git a/modules/webaccess/web/admin/webaccessadmin.py b/modules/webaccess/web/admin/webaccessadmin.py index d6703fbd7..f2f206cec 100644 --- a/modules/webaccess/web/admin/webaccessadmin.py +++ b/modules/webaccess/web/admin/webaccessadmin.py @@ -1,348 +1,353 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Invenio WebAccess Administrator Interface.""" __revision__ = "$Id$" from invenio.config import CFG_SITE_LANG import invenio.webaccessadmin_lib as wal from invenio.access_control_config import CFG_ACC_EMPTY_ROLE_DEFINITION_SRC # reload(wal) # from invenio.webaccessadmin_lib import index index = wal.index def rolearea(req, grep='', ln=CFG_SITE_LANG): """create the role area menu page.""" return wal.perform_rolearea(req=req, grep=grep) def actionarea(req, grep='', ln=CFG_SITE_LANG): """create the role area menu page.""" return wal.perform_actionarea(req=req, grep=grep) def managerobotlogin(req, robot_name='', new_pwd1='', new_pwd2='', login_method='', timeout='', referer='', ip='', action='', confirm=0, email='', groups='', nickname='', json_assertion='', url_only=0, ln=CFG_SITE_LANG): """Manage robot login keys and test URL.""" return wal.perform_managerobotlogin(req=req, robot_name=robot_name, new_pwd1=new_pwd1, new_pwd2=new_pwd2, login_method=login_method, timeout=timeout, referer=referer, ip=ip, action=action, confirm=confirm, email=email, groups=groups, nickname=nickname, json_assertion=json_assertion, url_only=url_only) def userarea(req, email_user_pattern='', ln=CFG_SITE_LANG): """create the user area menu page. """ return wal.perform_userarea(req=req, email_user_pattern=email_user_pattern) def listgroups(req, ln=CFG_SITE_LANG): return wal.perform_listgroups(req=req) def resetarea(req, ln=CFG_SITE_LANG): """create the role area menu page.""" return wal.perform_resetarea(req=req) def resetdefaultsettings(req, superusers=[], confirm=0, ln=CFG_SITE_LANG): """create the reset default settings page. """ return wal.perform_resetdefaultsettings(req=req, superusers=superusers, confirm=confirm) def adddefaultsettings(req, superusers=[], confirm=0, ln=CFG_SITE_LANG): """create the add default settings page. """ return wal.perform_adddefaultsettings(req=req, superusers=superusers, confirm=confirm) def manageaccounts(req, mtype='', content='', confirm=0, ln=CFG_SITE_LANG): """enable, disable and edit accounts""" return wal.perform_manageaccounts(req=req, mtype=mtype, content=content, confirm=confirm) def modifyaccountstatus(req, userID, email_user_pattern='', limit_to=-1, maxpage=25, page=1, callback='yes', confirm=0, ln=CFG_SITE_LANG): """enable or disable account""" return wal.perform_modifyaccountstatus(req=req, userID=userID, email_user_pattern=email_user_pattern, limit_to=limit_to, maxpage=maxpage, page=page, callback=callback, confirm=confirm) def modifypreferences(req, userID, login_method='', callback='yes', confirm=0, ln=CFG_SITE_LANG): """modify the preferences of an account""" return wal.perform_modifypreferences(req=req, userID=userID, login_method=login_method, callback=callback, confirm=confirm) +def modifyapikeydata(req, userID, keyID, status, callback='yes', confirm=0, ln=CFG_SITE_LANG): + """modify the status of a REST API key""" + + return wal.perform_modifyapikeydata(req=req, userID=userID, keyID=keyID, status=status, callback=callback, confirm=confirm) + def modifylogindata(req, userID, nickname='', email='', password='', callback='yes', confirm=0, ln=CFG_SITE_LANG): """modify the email/password of an account""" return wal.perform_modifylogindata(req=req, userID=userID, nickname=nickname, email=email, password=password, callback=callback, confirm=confirm) def rejectaccount(req, userID, email_user_pattern='', limit_to=-1, maxpage=25, page=1, callback='yes', confirm=0, ln=CFG_SITE_LANG): """Set account inactive, delete it and send email to the owner.""" return wal.perform_rejectaccount(req=req, userID=userID, email_user_pattern=email_user_pattern, limit_to=limit_to, maxpage=maxpage, page=page, callback=callback, confirm=confirm) def deleteaccount(req, userID, callback='yes', confirm=0, ln=CFG_SITE_LANG): """delete account""" return wal.perform_deleteaccount(req=req, userID=userID, callback=callback, confirm=confirm) def createaccount(req, email='', password='', callback='yes', confirm=0, ln=CFG_SITE_LANG): """create account""" return wal.perform_createaccount(req=req, email=email, password=password, callback=callback, confirm=confirm) def editaccount(req, userID, mtype='', content='', callback='yes', confirm=0, ln=CFG_SITE_LANG): """edit account. """ return wal.perform_editaccount(req=req, userID=userID, mtype=mtype, content=content, callback=callback, confirm=confirm) def becomeuser(req, userID='', callback='yes', confirm=0, ln=CFG_SITE_LANG): """edit account. """ return wal.perform_becomeuser(req=req, userID=userID, callback=callback, confirm=confirm) def modifyaccounts(req, email_user_pattern='', limit_to=-1, maxpage=25, page=1, callback='yes', confirm=0, ln=CFG_SITE_LANG): """Modify accounts. """ return wal.perform_modifyaccounts(req=req, email_user_pattern=email_user_pattern, limit_to=limit_to, maxpage=maxpage, page=page, callback=callback,confirm=confirm) def delegate_startarea(req, ln=CFG_SITE_LANG): """add info here""" return wal.perform_delegate_startarea(req=req) def delegate_adminsetup(req, id_role_admin=0, id_role_delegate=0, confirm=0, ln=CFG_SITE_LANG): """add info here""" return wal.perform_delegate_adminsetup(req=req, id_role_admin=id_role_admin, id_role_delegate=id_role_delegate, confirm=confirm) def delegate_adduserrole(req, id_role=0, email_user_pattern='', id_user=0, confirm=0, ln=CFG_SITE_LANG): """add info here""" return wal.perform_delegate_adduserrole(req=req, id_role=id_role, email_user_pattern=email_user_pattern, id_user=id_user, confirm=confirm) def delegate_deleteuserrole(req, id_role=0, id_user=0, confirm=0, ln=CFG_SITE_LANG): """add info here""" return wal.perform_delegate_deleteuserrole(req=req, id_role=id_role, id_user=id_user, confirm=confirm) def addrole(req, name_role='', description='put description here.', firerole_def_src=CFG_ACC_EMPTY_ROLE_DEFINITION_SRC, confirm=0, ln=CFG_SITE_LANG): """form to add a new role with these values: name_role - name of the new role description - optional description of the role firerole_def_src - optional firerole like definition """ return wal.perform_addrole(req=req, name_role=name_role, description=description, firerole_def_src=firerole_def_src, confirm=confirm) def modifyrole(req, id_role='0', name_role='', description='put description here.', firerole_def_src='', modified='0', confirm=0, ln=CFG_SITE_LANG): """form to add a new role with these values: name_role - name of the new role description - optional description of the role firerole_def_src - optional firerole like definition """ return wal.perform_modifyrole(req=req, id_role=id_role, name_role=name_role, description=description, firerole_def_src=firerole_def_src, modified=modified, confirm=confirm) def deleterole(req, id_role="0", confirm=0, ln=CFG_SITE_LANG): """select a role and show all connected information, users - users that can access the role. actions - actions with possible authorizations.""" return wal.perform_deleterole(req=req, id_role=id_role, confirm=confirm) def showroledetails(req, id_role='0', ln=CFG_SITE_LANG): """show the details of a role.""" return wal.perform_showroledetails(req=req, id_role=id_role) def showactiondetails(req, id_action="0", ln=CFG_SITE_LANG): """show the details of an action. """ return wal.perform_showactiondetails(req=req, id_action=id_action) def showuserdetails(req, id_user="0", ln=CFG_SITE_LANG): """show the details of an action. """ return wal.perform_showuserdetails(req=req, id_user=id_user) def adduserrole(req, id_role='0', email_user_pattern='', id_user='0', confirm=0, ln=CFG_SITE_LANG): """create connection between user and role. id_role - id of the role to add user to email_user_pattern - search for users using this pattern id_user - id of user to add to the role. """ return wal.perform_adduserrole(req=req, id_role=id_role, email_user_pattern=email_user_pattern, id_user=id_user, confirm=confirm) def addroleuser(req, email_user_pattern='', id_user='0', id_role='0', confirm=0, ln=CFG_SITE_LANG): """create connection between user and role. email_user_pattern - search for users using this pattern id_user - id of user to add to the role. id_role - id of the role to add user to. """ return wal.perform_addroleuser(req=req, email_user_pattern=email_user_pattern, id_user=id_user, id_role=id_role, confirm=confirm) def deleteuserrole(req, id_role='0', id_user='0', reverse=0, confirm=0, ln=CFG_SITE_LANG): """delete connection between role and user. id_role - id of role to disconnect id_user - id of user to disconnect. """ return wal.perform_deleteuserrole(req=req, id_role=id_role, id_user=id_user, reverse=reverse, confirm=confirm) def addauthorization(req, id_role="0", id_action="0", reverse="0", confirm=0, **keywords): """ form to add new connection between user and role: id_role - role to connect id_action - action to connect reverse - role or action first? """ return wal.perform_addauthorization(req=req, id_role=id_role, id_action=id_action, reverse=reverse, confirm=confirm, **keywords) def deleteroleaction(req, id_role="0", id_action="0", reverse=0, confirm=0, ln=CFG_SITE_LANG): """delete all connections between a role and an action. id_role - id of the role id_action - id of the action reverse - 0: ask for role first 1: ask for action first""" return wal.perform_deleteroleaction(req=req, id_role=id_role, id_action=id_action, reverse=reverse, confirm=confirm) def modifyauthorizations(req, id_role="0", id_action="0", reverse=0, confirm=0, sel='', errortext='', authids=[], ln=CFG_SITE_LANG): """given ids of a role and an action, show all possible action combinations with checkboxes and allow user to access other functions. id_role - id of the role id_action - id of the action reverse - 0: ask for role first 1: ask for action first sel - which button and modification that is selected errortext - text to print when no connection exist between role and action authids - ids of checked checkboxes """ return wal.perform_modifyauthorizations(req=req, id_role=id_role, id_action=id_action, reverse=reverse, confirm=confirm, sel=sel, authids=authids) def simpleauthorization(req, id_role=0, id_action=0, ln=CFG_SITE_LANG): """show a page with simple overview of authorizations between a connected role and action. """ return wal.perform_simpleauthorization(req=req, id_role=id_role, id_action=id_action) def showroleusers(req, id_role=0, ln=CFG_SITE_LANG): """show a page with simple overview of a role and connected users. """ return wal.perform_showroleusers(req=req, id_role=id_role) diff --git a/modules/webjournal/lib/webjournal_utils.py b/modules/webjournal/lib/webjournal_utils.py index 5fbd2840f..6b16455d3 100644 --- a/modules/webjournal/lib/webjournal_utils.py +++ b/modules/webjournal/lib/webjournal_utils.py @@ -1,1771 +1,1774 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Various utilities for WebJournal, e.g. config parser, etc. """ import time import datetime import calendar import re import os import cPickle import math import urllib from MySQLdb import OperationalError from xml.dom import minidom from urlparse import urlparse from invenio.config import \ CFG_ETCDIR, \ CFG_SITE_URL, \ CFG_CACHEDIR, \ CFG_SITE_LANG, \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_DEVEL_SITE from invenio.dbquery import run_sql from invenio.bibformat_engine import BibFormatObject from invenio.search_engine import search_pattern, record_exists from invenio.messages import gettext_set_language from invenio.errorlib import register_exception +from invenio.urlutils import make_invenio_opener + +WEBJOURNAL_OPENER = make_invenio_opener('WebJournal') ########################### REGULAR EXPRESSIONS ###################### header_pattern = re.compile('\s*(?P
    .*?)\s*

    ') header_pattern2 = re.compile('(?P
    .*?)

    ') para_pattern = re.compile('(?P.+?)

    ', re.DOTALL) img_pattern = re.compile('\S+?)("|\'|\s).*?/>', re.DOTALL) image_pattern = re.compile(r''' (\S*)["']?>)?# get the link location for the image \s*# after each tag we can have arbitrary whitespaces
    # the image is always centered \s* \S*)\s*border=1\s*(/)?># getting the image itself \s*
    \s* ()? (
    |
    |
    )*# the caption can be separated by any nr of line breaks ( \s* \s*
    (?P.*?)
    # getting the caption \s*
    \s*
    )?''', re.DOTALL | re.VERBOSE | re.IGNORECASE ) #' ############################## FEATURED RECORDS ###################### def get_featured_records(journal_name): """ Returns the 'featured' records i.e. records chosen to be displayed with an image on the main page, in the widgets section, for the given journal. parameter: journal_name - (str) the name of the journal for which we want to get the featured records returns: list of tuples (recid, img_url) """ try: feature_file = open('%s/webjournal/%s/featured_record' % \ (CFG_ETCDIR, journal_name)) except: return [] records = feature_file.readlines() return [(record.split('---', 1)[0], record.split('---', 1)[1]) \ for record in records if "---" in record] def add_featured_record(journal_name, recid, img_url): """ Adds the given record to the list of featured records of the given journal. parameters: journal_name - (str) the name of the journal to which the record should be added. recid - (int) the record id of the record to be featured. img_url - (str) a url to an image icon displayed along the featured record. returns: 0 if everything went ok 1 if record is already in the list 2 if other problems """ # Check that record is not already there featured_records = get_featured_records(journal_name) for featured_recid, featured_img in featured_records: if featured_recid == str(recid): return 1 try: fptr = open('%s/webjournal/%s/featured_record' % (CFG_ETCDIR, journal_name), "a") fptr.write(str(recid) + '---' + img_url + '\n') fptr.close() except: return 2 return 0 def remove_featured_record(journal_name, recid): """ Removes the given record from the list of featured records of the given journal. parameters: journal_name - (str) the name of the journal to which the record should be added. recid - (int) the record id of the record to be featured. """ featured_records = get_featured_records(journal_name) try: fptr = open('%s/webjournal/%s/featured_record' % (CFG_ETCDIR, journal_name), "w") for featured_recid, featured_img in featured_records: if str(featured_recid) != str(recid): fptr.write(str(featured_recid) + '---' + featured_img + \ '\n') fptr.close() except: return 1 return 0 ############################ ARTICLES RELATED ######################## def get_order_dict_from_recid_list(recids, journal_name, issue_number, newest_first=False, newest_only=False): """ Returns the ordered list of input recids, for given 'issue_number'. Since there might be several articles at the same position, the returned structure is a dictionary with keys being order number indicated in record metadata, and values being list of recids for this order number (recids for one position are ordered from highest to lowest recid). Eg: {'1': [2390, 2386, 2385], '3': [2388], '2': [2389], '4': [2387]} Parameters: recids - a list of all recid's that should be brought into order journal_name - the name of the journal issue_number - *str* the issue_number for which we are deriving the order newest_first - *bool* if True, new articles should be placed at beginning of the list. If so, their position/order will be negative integers newest_only - *bool* if only new articles should be returned Returns: ordered_records: a dictionary with the recids ordered by keys """ ordered_records = {} ordered_new_records = {} records_without_defined_order = [] new_records_without_defined_order = [] for record in recids: temp_rec = BibFormatObject(record) articles_info = temp_rec.fields('773__') for article_info in articles_info: if article_info.get('n', '') == issue_number or \ '0' + article_info.get('n', '') == issue_number: if article_info.has_key('c') and \ article_info['c'].isdigit(): order_number = int(article_info.get('c', '')) if (newest_first or newest_only) and \ is_new_article(journal_name, issue_number, record): if ordered_new_records.has_key(order_number): ordered_new_records[order_number].append(record) else: ordered_new_records[order_number] = [record] elif not newest_only: if ordered_records.has_key(order_number): ordered_records[order_number].append(record) else: ordered_records[order_number] = [record] else: # No order? No problem! Append it at the end. if newest_first and is_new_article(journal_name, issue_number, record): new_records_without_defined_order.append(record) elif not newest_only: records_without_defined_order.append(record) # Append records without order at the end of the list if records_without_defined_order: if ordered_records: ordered_records[max(ordered_records.keys()) + 1] = records_without_defined_order else: ordered_records[1] = records_without_defined_order # Append new records without order at the end of the list of new # records if new_records_without_defined_order: if ordered_new_records: ordered_new_records[max(ordered_new_records.keys()) + 1] = new_records_without_defined_order else: ordered_new_records[1] = new_records_without_defined_order # Append new records at the beginning of the list of 'old' # records. To do so, use negative integers if ordered_new_records: highest_new_record_order = max(ordered_new_records.keys()) for order, new_records in ordered_new_records.iteritems(): ordered_records[- highest_new_record_order + order - 1] = new_records for (order, records) in ordered_records.iteritems(): # Reverse so that if there are several articles at same # positon, newest appear first records.reverse() return ordered_records def get_journal_articles(journal_name, issue, category, newest_first=False, newest_only=False): """ Returns the recids in given category and journal, for given issue number. The returned recids are grouped according to their 773__c field. Example of returned value: {'1': [2390, 2386, 2385], '3': [2388], '2': [2389], '4': [2387]} Parameters: journal_name - *str* the name of the journal (as used in URLs) issue - *str* the issue. Eg: "08/2007" category - *str* the name of the category newest_first - *bool* if True, new articles should be placed at beginning of the list. If so, their position/order will be negative integers newest_only - *bool* if only new articles should be returned """ use_cache = True current_issue = get_current_issue(CFG_SITE_LANG, journal_name) if issue_is_later_than(issue, current_issue): # If we are working on unreleased issue, do not use caching # mechanism use_cache = False if use_cache: cached_articles = _get_cached_journal_articles(journal_name, issue, category) if cached_articles is not None: ordered_articles = get_order_dict_from_recid_list(cached_articles, journal_name, issue, newest_first, newest_only) return ordered_articles # Retrieve the list of rules that map Category -> Search Pattern. # Keep only the rule matching our category config_strings = get_xml_from_config(["record/rule"], journal_name) category_to_search_pattern_rules = config_strings["record/rule"] try: matching_rule = [rule.split(',', 1) for rule in \ category_to_search_pattern_rules \ if rule.split(',')[0] == category] except: return [] recids_issue = search_pattern(p='773__n:%s' % issue) recids_rule = search_pattern(p=matching_rule[0][1]) if issue[0] == '0': # search for 09/ and 9/ recids_issue.union_update(search_pattern(p='773__n:%s' % issue.lstrip('0'))) recids_rule.intersection_update(recids_issue) recids = [recid for recid in recids_rule if record_exists(recid) == 1] if use_cache: _cache_journal_articles(journal_name, issue, category, recids) ordered_articles = get_order_dict_from_recid_list(recids, journal_name, issue, newest_first, newest_only) return ordered_articles def _cache_journal_articles(journal_name, issue, category, articles): """ Caches given articles IDs. """ journal_cache_path = get_journal_article_cache_path(journal_name, issue) try: journal_cache_file = open(journal_cache_path, 'r') journal_info = cPickle.load(journal_cache_file) journal_cache_file.close() except cPickle.PickleError, e: journal_info = {} except IOError: journal_info = {} except EOFError: journal_info = {} except ValueError: journal_info = {} if not journal_info.has_key('journal_articles'): journal_info['journal_articles'] = {} journal_info['journal_articles'][category] = articles # Create cache directory if it does not exist journal_cache_dir = os.path.dirname(journal_cache_path) if not os.path.exists(journal_cache_dir): try: os.makedirs(journal_cache_dir) except: return False journal_cache_file = open(journal_cache_path, 'w') cPickle.dump(journal_info, journal_cache_file) journal_cache_file.close() return True def _get_cached_journal_articles(journal_name, issue, category): """ Retrieve the articles IDs cached for this journal. Returns None if cache does not exist or more than 5 minutes old """ # Check if our cache is more or less up-to-date (not more than 5 # minutes old) try: journal_cache_path = get_journal_article_cache_path(journal_name, issue) last_update = os.path.getctime(journal_cache_path) except Exception, e : return None now = time.time() if (last_update + 5*60) < now: return None # Get from cache try: journal_cache_file = open(journal_cache_path, 'r') journal_info = cPickle.load(journal_cache_file) journal_articles = journal_info.get('journal_articles', {}).get(category, None) journal_cache_file.close() except cPickle.PickleError, e: journal_articles = None except IOError: journal_articles = None except EOFError: journal_articles = None except ValueError: journal_articles = None return journal_articles def is_new_article(journal_name, issue, recid): """ Check if given article should be considered as new or not. New articles are articles that have never appeared in older issues than given one. """ article_found_in_older_issue = False temp_rec = BibFormatObject(recid) publication_blocks = temp_rec.fields('773__') for publication_block in publication_blocks: this_issue_number, this_issue_year = issue.split('/') issue_number, issue_year = publication_block.get('n', '/').split('/', 1) if int(issue_year) < int(this_issue_year): # Found an older issue article_found_in_older_issue = True break elif int(issue_year) == int(this_issue_year) and \ int(issue_number) < int(this_issue_number): # Found an older issue article_found_in_older_issue = True break return not article_found_in_older_issue ############################ CATEGORIES RELATED ###################### def get_journal_categories(journal_name, issue=None): """ List the categories for the given journal and issue. Returns categories in same order as in config file. Parameters: journal_name - *str* the name of the journal (as used in URLs) issue - *str* the issue. Eg:'08/2007'. If None, consider all categories defined in journal config """ categories = [] current_issue = get_current_issue(CFG_SITE_LANG, journal_name) config_strings = get_xml_from_config(["record/rule"], journal_name) all_categories = [rule.split(',')[0] for rule in \ config_strings["record/rule"]] if issue is None: return all_categories for category in all_categories: recids = get_journal_articles(journal_name, issue, category) if len(recids.keys()) > 0: categories.append(category) return categories def get_category_query(journal_name, category): """ Returns the category definition for the given category and journal name Parameters: journal_name - *str* the name of the journal (as used in URLs) categoy - *str* a category name, as found in the XML config """ config_strings = get_xml_from_config(["record/rule"], journal_name) category_to_search_pattern_rules = config_strings["record/rule"] try: matching_rule = [rule.split(',', 1)[1].strip() for rule in \ category_to_search_pattern_rules \ if rule.split(',')[0] == category] except: return None return matching_rule[0] ######################### JOURNAL CONFIG VARS ###################### cached_parsed_xml_config = {} def get_xml_from_config(nodes, journal_name): """ Returns values from the journal configuration file. The needed values can be specified by node name, or by a hierarchy of nodes names using '/' as character to mean 'descendant of'. Eg. 'record/rule' to get all the values of 'rule' tags inside the 'record' node Returns a dictionary with a key for each query and a list of strings (innerXml) results for each key. Has a special field "config_fetching_error" that returns an error when something has gone wrong. """ # Get and open the config file results = {} if cached_parsed_xml_config.has_key(journal_name): config_file = cached_parsed_xml_config[journal_name] else: config_path = '%s/webjournal/%s/%s-config.xml' % \ (CFG_ETCDIR, journal_name, journal_name) config_file = minidom.Document try: config_file = minidom.parse("%s" % config_path) except: # todo: raise exception "error: no config file found" results["config_fetching_error"] = "could not find config file" return results else: cached_parsed_xml_config[journal_name] = config_file for node_path in nodes: node = config_file for node_path_component in node_path.split('/'): # pylint: disable=E1103 # The node variable can be rewritten in the loop and therefore # its type can change. if node != config_file and node.length > 0: # We have a NodeList object: consider only first child node = node.item(0) # pylint: enable=E1103 try: node = node.getElementsByTagName(node_path_component) except: # WARNING, config did not have such value node = [] break results[node_path] = [] for result in node: try: result_string = result.firstChild.toxml(encoding="utf-8") except: # WARNING, config did not have such value continue results[node_path].append(result_string) return results def get_journal_issue_field(journal_name): """ Returns the MARC field in which this journal expects to find the issue number. Read this from the journal config file Parameters: journal_name - *str* the name of the journal (as used in URLs) """ config_strings = get_xml_from_config(["issue_number"], journal_name) try: issue_field = config_strings["issue_number"][0] except: issue_field = '773__n' return issue_field def get_journal_css_url(journal_name, type='screen'): """ Returns URL to this journal's CSS. Parameters: journal_name - *str* the name of the journal (as used in URLs) type - *str* 'screen' or 'print', depending on the kind of CSS """ config_strings = get_xml_from_config([type], journal_name) css_path = '' try: css_path = config_strings["screen"][0] except Exception: register_exception(req=None, suffix="No css file for journal %s. Is this right?" % \ journal_name) return CFG_SITE_URL + '/' + css_path def get_journal_submission_params(journal_name): """ Returns the (doctype, identifier element, identifier field) for the submission of articles in this journal, so that it is possible to build direct submission links. Parameter: journal_name - *str* the name of the journal (as used in URLs) """ doctype = '' identifier_field = '' identifier_element = '' config_strings = get_xml_from_config(["submission/doctype"], journal_name) if config_strings.get('submission/doctype', ''): doctype = config_strings['submission/doctype'][0] config_strings = get_xml_from_config(["submission/identifier_element"], journal_name) if config_strings.get('submission/identifier_element', ''): identifier_element = config_strings['submission/identifier_element'][0] config_strings = get_xml_from_config(["submission/identifier_field"], journal_name) if config_strings.get('submission/identifier_field', ''): identifier_field = config_strings['submission/identifier_field'][0] else: identifier_field = '037__a' return (doctype, identifier_element, identifier_field) def get_journal_draft_keyword_to_remove(journal_name): """ Returns the keyword that should be removed from the article metadata in order to move the article from Draft to Ready """ config_strings = get_xml_from_config(["draft_keyword"], journal_name) if config_strings.get('draft_keyword', ''): return config_strings['draft_keyword'][0] return '' def get_journal_alert_sender_email(journal_name): """ Returns the email address that should be used as send of the alert email. If not specified, use CFG_SITE_SUPPORT_EMAIL """ config_strings = get_xml_from_config(["alert_sender"], journal_name) if config_strings.get('alert_sender', ''): return config_strings['alert_sender'][0] return CFG_SITE_SUPPORT_EMAIL def get_journal_alert_recipient_email(journal_name): """ Returns the default email address of the recipients of the email Return a string of comma-separated emails. """ if CFG_DEVEL_SITE: # To be on the safe side, do not return the default alert recipients. return '' config_strings = get_xml_from_config(["alert_recipients"], journal_name) if config_strings.get('alert_recipients', ''): return config_strings['alert_recipients'][0] return '' def get_journal_collection_to_refresh_on_release(journal_name): """ Returns the list of collection to update (WebColl) upon release of an issue. """ from invenio.search_engine import collection_reclist_cache config_strings = get_xml_from_config(["update_on_release/collection"], journal_name) return [coll for coll in config_strings.get('update_on_release/collection', []) if \ collection_reclist_cache.cache.has_key(coll)] def get_journal_index_to_refresh_on_release(journal_name): """ Returns the list of indexed to update (BibIndex) upon release of an issue. """ from invenio.bibindex_engine import get_index_id_from_index_name config_strings = get_xml_from_config(["update_on_release/index"], journal_name) return [index for index in config_strings.get('update_on_release/index', []) if \ get_index_id_from_index_name(index) != ''] def get_journal_template(template, journal_name, ln=CFG_SITE_LANG): """ Returns the journal templates name for the given template type Raise an exception if template cannot be found. """ from invenio.webjournal_config import \ InvenioWebJournalTemplateNotFoundError config_strings = get_xml_from_config([template], journal_name) try: index_page_template = 'webjournal' + os.sep + \ config_strings[template][0] except: raise InvenioWebJournalTemplateNotFoundError(ln, journal_name, template) return index_page_template def get_journal_name_intl(journal_name, ln=CFG_SITE_LANG): """ Returns the nice name of the journal, translated if possible """ _ = gettext_set_language(ln) config_strings = get_xml_from_config(["niceName"], journal_name) if config_strings.get('niceName', ''): return _(config_strings['niceName'][0]) return '' def get_journal_languages(journal_name): """ Returns the list of languages defined for this journal """ config_strings = get_xml_from_config(["languages"], journal_name) if config_strings.get('languages', ''): return [ln.strip() for ln in \ config_strings['languages'][0].split(',')] return [] def get_journal_issue_grouping(journal_name): """ Returns the number of issue that are typically released at the same time. This is used if every two weeks you release an issue that should contains issue of next 2 weeks (eg. at week 16, you relase an issue named '16-17/2009') This number should help in the admin interface to guess how to release the next issue (can be overidden by user). """ config_strings = get_xml_from_config(["issue_grouping"], journal_name) if config_strings.get('issue_grouping', ''): issue_grouping = config_strings['issue_grouping'][0] if issue_grouping.isdigit() and int(issue_grouping) > 0: return int(issue_grouping) return 1 def get_journal_nb_issues_per_year(journal_name): """ Returns the default number of issues per year for this journal. This number should help in the admin interface to guess the next issue number (can be overidden by user). """ config_strings = get_xml_from_config(["issues_per_year"], journal_name) if config_strings.get('issues_per_year', ''): issues_per_year = config_strings['issues_per_year'][0] if issues_per_year.isdigit() and int(issues_per_year) > 0: return int(issues_per_year) return 52 def get_journal_preferred_language(journal_name, ln): """ Returns the most adequate language to display the journal, given a language. """ languages = get_journal_languages(journal_name) if ln in languages: return ln elif CFG_SITE_LANG in languages: return CFG_SITE_LANG elif languages: return languages else: return CFG_SITE_LANG def get_unreleased_issue_hiding_mode(journal_name): """ Returns how unreleased issue should be treated. Can be one of the following string values: 'future' - only future unreleased issues are hidden. Past unreleased one can be viewed 'all' - any unreleased issue (past and future) have to be hidden - 'none' - no unreleased issue is hidden """ config_strings = get_xml_from_config(["hide_unreleased_issues"], journal_name) if config_strings.get('hide_unreleased_issues', ''): hide_unreleased_issues = config_strings['hide_unreleased_issues'][0] if hide_unreleased_issues in ['future', 'all', 'none']: return hide_unreleased_issues return 'all' def get_first_issue_from_config(journal_name): """ Returns the first issue as defined from config. This should only be useful when no issue have been released. If not specified, returns the issue made of current week number and year. """ config_strings = get_xml_from_config(["first_issue"], journal_name) if config_strings.has_key('first_issue'): return config_strings['first_issue'][0] return time.strftime("%W/%Y", time.localtime()) ######################## TIME / ISSUE FUNCTIONS ###################### def get_current_issue(ln, journal_name): """ Returns the current issue of a journal as a string. Current issue is the latest released issue. """ journal_id = get_journal_id(journal_name, ln) try: current_issue = run_sql("""SELECT issue_number FROM jrnISSUE WHERE date_released <= NOW() AND id_jrnJOURNAL=%s ORDER BY date_released DESC LIMIT 1""", (journal_id,))[0][0] except: # start the first journal ever current_issue = get_first_issue_from_config(journal_name) run_sql("""INSERT INTO jrnISSUE (id_jrnJOURNAL, issue_number, issue_display) VALUES(%s, %s, %s)""", (journal_id, current_issue, current_issue)) return current_issue def get_all_released_issues(journal_name): """ Returns the list of released issue, ordered by release date Note that it only includes the issues that are considered as released in the DB: it will not for example include articles that have been imported in the system but not been released """ journal_id = get_journal_id(journal_name) res = run_sql("""SELECT issue_number FROM jrnISSUE WHERE id_jrnJOURNAL = %s AND UNIX_TIMESTAMP(date_released) != 0 ORDER BY date_released DESC""", (journal_id,)) if res: return [row[0] for row in res] else: return [] def get_next_journal_issues(current_issue_number, journal_name, n=2): """ This function suggests the 'n' next issue numbers """ number, year = current_issue_number.split('/', 1) number = int(number) year = int(year) number_issues_per_year = get_journal_nb_issues_per_year(journal_name) next_issues = [make_issue_number(journal_name, ((number - 1 + i) % (number_issues_per_year)) + 1, year + ((number - 1 + i) / number_issues_per_year)) \ for i in range(1, n + 1)] return next_issues def get_grouped_issues(journal_name, issue_number): """ Returns all the issues grouped with a given one. Issues are sorted from the oldest to newest one. """ grouped_issues = [] journal_id = get_journal_id(journal_name, CFG_SITE_LANG) issue_display = get_issue_number_display(issue_number, journal_name) res = run_sql("""SELECT issue_number FROM jrnISSUE WHERE id_jrnJOURNAL=%s AND issue_display=%s""", (journal_id, issue_display)) if res: grouped_issues = [row[0] for row in res] grouped_issues.sort(compare_issues) return grouped_issues def compare_issues(issue1, issue2): """ Comparison function for issues. Returns: -1 if issue1 is older than issue2 0 if issues are equal 1 if issue1 is newer than issue2 """ issue1_number, issue1_year = issue1.split('/', 1) issue2_number, issue2_year = issue2.split('/', 1) if int(issue1_year) == int(issue2_year): return cmp(int(issue1_number), int(issue2_number)) else: return cmp(int(issue1_year), int(issue2_year)) def issue_is_later_than(issue1, issue2): """ Returns true if issue1 is later than issue2 """ issue_number1, issue_year1 = issue1.split('/', 1) issue_number2, issue_year2 = issue2.split('/', 1) if int(issue_year1) > int(issue_year2): return True elif int(issue_year1) == int(issue_year2): return int(issue_number1) > int(issue_number2) else: return False def get_issue_number_display(issue_number, journal_name, ln=CFG_SITE_LANG): """ Returns the display string for a given issue number. """ journal_id = get_journal_id(journal_name, ln) issue_display = run_sql("""SELECT issue_display FROM jrnISSUE WHERE issue_number=%s AND id_jrnJOURNAL=%s""", (issue_number, journal_id)) if issue_display: return issue_display[0][0] else: # Not yet released... return issue_number def make_issue_number(journal_name, number, year, for_url_p=False): """ Creates a normalized issue number representation with given issue number (as int or str) and year (as int or str). Reverse the year and number if for_url_p is True """ number_issues_per_year = get_journal_nb_issues_per_year(journal_name) precision = len(str(number_issues_per_year)) number = int(str(number)) year = int(str(year)) if for_url_p: return ("%i/%0" + str(precision) + "i") % \ (year, number) else: return ("%0" + str(precision) + "i/%i") % \ (number, year) def get_release_datetime(issue, journal_name, ln=CFG_SITE_LANG): """ Gets the date at which an issue was released from the DB. Returns None if issue has not yet been released. See issue_to_datetime() to get the *theoretical* release time of an issue. """ journal_id = get_journal_id(journal_name, ln) try: release_date = run_sql("""SELECT date_released FROM jrnISSUE WHERE issue_number=%s AND id_jrnJOURNAL=%s""", (issue, journal_id))[0][0] except: return None if release_date: return release_date else: return None def get_announcement_datetime(issue, journal_name, ln=CFG_SITE_LANG): """ Get the date at which an issue was announced through the alert system. Return None if not announced """ journal_id = get_journal_id(journal_name, ln) try: announce_date = run_sql("""SELECT date_announced FROM jrnISSUE WHERE issue_number=%s AND id_jrnJOURNAL=%s""", (issue, journal_id))[0][0] except: return None if announce_date: return announce_date else: return None def datetime_to_issue(issue_datetime, journal_name): """ Returns the issue corresponding to the given datetime object. If issue_datetime is too far in the future or in the past, gives the best possible matching issue, or None, if it does not seem to exist. #If issue_datetime is too far in the future, return the latest #released issue. #If issue_datetime is too far in the past, return None Parameters: issue_datetime - *datetime* date of the issue to be retrieved journal_name - *str* the name of the journal (as used in URLs) """ issue_number = None journal_id = get_journal_id(journal_name) # Try to discover how much days an issue is valid nb_issues_per_year = get_journal_nb_issues_per_year(journal_name) this_year_number_of_days = 365 if calendar.isleap(issue_datetime.year): this_year_number_of_days = 366 issue_day_lifetime = math.ceil(float(this_year_number_of_days)/nb_issues_per_year) res = run_sql("""SELECT issue_number, date_released FROM jrnISSUE WHERE date_released < %s AND id_jrnJOURNAL = %s ORDER BY date_released DESC LIMIT 1""", (issue_datetime, journal_id)) if res and res[0][1]: issue_number = res[0][0] issue_release_date = res[0][1] # Check that the result is not too far in the future: if issue_release_date + datetime.timedelta(issue_day_lifetime) < issue_datetime: # In principle, the latest issue will no longer be valid # at that time return None else: # Mmh, are we too far in the past? This can happen in the case # of articles that have been imported in the system but never # considered as 'released' in the database. So we should still # try to approximate/match an issue: if round(issue_day_lifetime) in [6, 7, 8]: # Weekly issues. We can use this information to better # match the issue number issue_nb = int(issue_datetime.strftime('%W')) # = week number else: # Compute the number of days since beginning of year, and # divide by the lifetime of an issue: we get the # approximate issue_number issue_nb = math.ceil((int(issue_datetime.strftime('%j')) / issue_day_lifetime)) issue_number = ("%0" + str(len(str(nb_issues_per_year)))+ "i/%i") % (issue_nb, issue_datetime.year) # Now check if this issue exists in the system for this # journal if not get_journal_categories(journal_name, issue_number): # This issue did not exist return None return issue_number DAILY = 1 WEEKLY = 2 MONTHLY = 3 def issue_to_datetime(issue_number, journal_name, granularity=None): """ Returns the *theoretical* date of release for given issue: useful if you release on Friday, but the issue date of the journal should correspond to the next Monday. This will correspond to the next day/week/month, depending on the number of issues per year (or the 'granularity' if specified) and the release time (if close to the end of a period defined by the granularity, consider next period since release is made a bit in advance). See get_release_datetime() for the *real* release time of an issue THIS FUNCTION SHOULD ONLY BE USED FOR INFORMATIVE DISPLAY PURPOSE, AS IT GIVES APPROXIMATIVE RESULTS. Do not use it to make decisions. Parameters: issue_number - *str* issue number to consider journal_name - *str* the name of the journal (as used in URLs) granularity - *int* the granularity to consider """ # If we have released, we can use this information. Otherwise we # have to approximate. issue_date = get_release_datetime(issue_number, journal_name) if not issue_date: # Approximate release date number, year = issue_number.split('/') number = int(number) year = int(year) nb_issues_per_year = get_journal_nb_issues_per_year(journal_name) this_year_number_of_days = 365 if calendar.isleap(year): this_year_number_of_days = 366 issue_day_lifetime = float(this_year_number_of_days)/nb_issues_per_year # Compute from beginning of the year issue_date = datetime.datetime(year, 1, 1) + \ datetime.timedelta(days=int(round((number - 1) * issue_day_lifetime))) # Okay, but if last release is not too far in the past, better # compute from the release. current_issue = get_current_issue(CFG_SITE_LANG, journal_name) current_issue_time = get_release_datetime(current_issue, journal_name) if current_issue_time.year == issue_date.year: current_issue_number, current_issue_year = current_issue.split('/') current_issue_number = int(current_issue_number) # Compute from last release issue_date = current_issue_time + \ datetime.timedelta(days=int((number - current_issue_number) * issue_day_lifetime)) # If granularity is not specifed, deduce from config if granularity is None: nb_issues_per_year = get_journal_nb_issues_per_year(journal_name) if nb_issues_per_year > 250: granularity = DAILY elif nb_issues_per_year > 40: granularity = WEEKLY else: granularity = MONTHLY # Now we can adapt the date to match the granularity if granularity == DAILY: if issue_date.hour >= 15: # If released after 3pm, consider it is the issue of the next # day issue_date = issue_date + datetime.timedelta(days=1) elif granularity == WEEKLY: (year, week_nb, day_nb) = issue_date.isocalendar() if day_nb > 4: # If released on Fri, Sat or Sun, consider that it is next # week's issue. issue_date = issue_date + datetime.timedelta(weeks=1) # Get first day of the week issue_date = issue_date - datetime.timedelta(days=issue_date.weekday()) else: if issue_date.day > 22: # If released last week of the month, consider release for # next month issue_date = issue_date.replace(month=issue_date.month+1) date_string = issue_date.strftime("%Y %m 1") issue_date = datetime.datetime(*(time.strptime(date_string, "%Y %m %d")[0:6])) return issue_date def get_number_of_articles_for_issue(issue, journal_name, ln=CFG_SITE_LANG): """ Function that returns a dictionary with all categories and number of articles in each category. """ all_articles = {} categories = get_journal_categories(journal_name, issue) for category in categories: all_articles[category] = len(get_journal_articles(journal_name, issue, category)) return all_articles ########################## JOURNAL RELATED ########################### def get_journal_info_path(journal_name): """ Returns the path to the info file of the given journal. The info file should be used to get information about a journal when database is not available. Returns None if path cannot be determined """ # We must make sure we don't try to read outside of webjournal # cache dir info_path = os.path.abspath("%s/webjournal/%s/info.dat" % \ (CFG_CACHEDIR, journal_name)) if info_path.startswith(CFG_CACHEDIR + '/webjournal/'): return info_path else: return None def get_journal_article_cache_path(journal_name, issue): """ Returns the path to cache file of the articles of a given issue Returns None if path cannot be determined """ # We must make sure we don't try to read outside of webjournal # cache dir cache_path = os.path.abspath("%s/webjournal/%s/%s_articles_cache.dat" % \ (CFG_CACHEDIR, journal_name, issue.replace('/', '_'))) if cache_path.startswith(CFG_CACHEDIR + '/webjournal/'): return cache_path else: return None def get_journal_id(journal_name, ln=CFG_SITE_LANG): """ Get the id for this journal from the DB. If DB is down, try to get from cache. """ journal_id = None from invenio.webjournal_config import InvenioWebJournalJournalIdNotFoundDBError if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: # do not connect to the database as the site is closed for # maintenance: journal_info_path = get_journal_info_path(journal_name) try: journal_info_file = open(journal_info_path, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info.get('journal_id', None) except cPickle.PickleError, e: journal_id = None except IOError: journal_id = None except ValueError: journal_id = None else: try: res = run_sql("SELECT id FROM jrnJOURNAL WHERE name=%s", (journal_name,)) if len(res) > 0: journal_id = res[0][0] except OperationalError, e: # Cannot connect to database. Try to read from cache journal_info_path = get_journal_info_path(journal_name) try: journal_info_file = open(journal_info_path, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info['journal_id'] except cPickle.PickleError, e: journal_id = None except IOError: journal_id = None except ValueError: journal_id = None if journal_id is None: raise InvenioWebJournalJournalIdNotFoundDBError(ln, journal_name) return journal_id def guess_journal_name(ln, journal_name=None): """ Tries to take a guess what a user was looking for on the server if not providing a name for the journal, or if given journal name does not match case of original journal. """ from invenio.webjournal_config import InvenioWebJournalNoJournalOnServerError from invenio.webjournal_config import InvenioWebJournalNoNameError journals_id_and_names = get_journals_ids_and_names() if len(journals_id_and_names) == 0: raise InvenioWebJournalNoJournalOnServerError(ln) elif not journal_name and \ journals_id_and_names[0].has_key('journal_name'): return journals_id_and_names[0]['journal_name'] elif len(journals_id_and_names) > 0: possible_journal_names = [journal_id_and_name['journal_name'] for journal_id_and_name \ in journals_id_and_names \ if journal_id_and_name.get('journal_name', '').lower() == journal_name.lower()] if possible_journal_names: return possible_journal_names[0] else: raise InvenioWebJournalNoNameError(ln) else: raise InvenioWebJournalNoNameError(ln) def get_journals_ids_and_names(): """ Returns the list of existing journals IDs and names. Try to read from the DB, or from cache if DB is not accessible. """ journals = [] if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: # do not connect to the database as the site is closed for # maintenance: files = os.listdir("%s/webjournal" % CFG_CACHEDIR) info_files = [path + os.sep + 'info.dat' for path in files if \ os.path.isdir(path) and \ os.path.exists(path + os.sep + 'info.dat')] for info_file in info_files: try: journal_info_file = open(info_file, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info.get('journal_id', None) journal_name = journal_info.get('journal_name', None) current_issue = journal_info.get('current_issue', None) if journal_id is not None and \ journal_name is not None: journals.append({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': current_issue}) except cPickle.PickleError, e: # Well, can't do anything... continue except IOError: # Well, can't do anything... continue except ValueError: continue else: try: res = run_sql("SELECT id, name FROM jrnJOURNAL ORDER BY id") for journal_id, journal_name in res: journals.append({'journal_id': journal_id, 'journal_name': journal_name}) except OperationalError, e: # Cannot connect to database. Try to read from cache files = os.listdir("%s/webjournal" % CFG_CACHEDIR) info_files = [path + os.sep + 'info.dat' for path in files if \ os.path.isdir(path) and \ os.path.exists(path + os.sep + 'info.dat')] for info_file in info_files: try: journal_info_file = open(info_file, 'r') journal_info = cPickle.load(journal_info_file) journal_id = journal_info.get('journal_id', None) journal_name = journal_info.get('journal_name', None) current_issue = journal_info.get('current_issue', None) if journal_id is not None and \ journal_name is not None: journals.append({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': current_issue}) except cPickle.PickleError, e: # Well, can't do anything... continue except IOError: # Well, can't do anything... continue except ValueError: continue return journals def parse_url_string(uri): """ Centralized function to parse any url string given in webjournal. Useful to retrieve current category, journal, etc. from within format elements The webjournal interface handler should already have cleaned the URI beforehand, so that journal name exist, issue number is correct, etc. The only remaining problem might be due to the capitalization of journal name in contact, search and popup pages, so clean the journal name. Note that language is also as returned from the URL, which might need to be filtered to match available languages (WebJournal elements can rely in bfo.lang to retrieve washed language) returns: args: all arguments in dict form """ args = {'journal_name' : '', 'issue_year' : '', 'issue_number' : None, 'issue' : None, 'category' : '', 'recid' : -1, 'verbose' : 0, 'ln' : CFG_SITE_LANG, 'archive_year' : None, 'archive_search': ''} if not uri.startswith('/journal'): # Mmh, incorrect context. Still, keep language if available url_params = urlparse(uri)[4] args['ln'] = dict([part.split('=') for part in url_params.split('&') \ if len(part.split('=')) == 2]).get('ln', CFG_SITE_LANG) return args # Take everything after journal and before first question mark splitted_uri = uri.split('journal', 1) second_part = splitted_uri[1] splitted_uri = second_part.split('?') uri_middle_part = splitted_uri[0] uri_arguments = '' if len(splitted_uri) > 1: uri_arguments = splitted_uri[1] arg_list = uri_arguments.split("&") args['ln'] = CFG_SITE_LANG args['verbose'] = 0 for arg_pair in arg_list: arg_and_value = arg_pair.split('=') if len(arg_and_value) == 2: if arg_and_value[0] == 'ln': args['ln'] = arg_and_value[1] elif arg_and_value[0] == 'verbose' and \ arg_and_value[1].isdigit(): args['verbose'] = int(arg_and_value[1]) elif arg_and_value[0] == 'archive_year' and \ arg_and_value[1].isdigit(): args['archive_year'] = int(arg_and_value[1]) elif arg_and_value[0] == 'archive_search': args['archive_search'] = arg_and_value[1] elif arg_and_value[0] == 'name': args['journal_name'] = guess_journal_name(args['ln'], arg_and_value[1]) arg_list = uri_middle_part.split("/") if len(arg_list) > 1 and arg_list[1] not in ['search', 'contact', 'popup']: args['journal_name'] = urllib.unquote(arg_list[1]) elif arg_list[1] not in ['search', 'contact', 'popup']: args['journal_name'] = guess_journal_name(args['ln'], args['journal_name']) cur_issue = get_current_issue(args['ln'], args['journal_name']) if len(arg_list) > 2: try: args['issue_year'] = int(urllib.unquote(arg_list[2])) except: args['issue_year'] = int(cur_issue.split('/')[1]) else: args['issue'] = cur_issue args['issue_year'] = int(cur_issue.split('/')[1]) args['issue_number'] = int(cur_issue.split('/')[0]) if len(arg_list) > 3: try: args['issue_number'] = int(urllib.unquote(arg_list[3])) except: args['issue_number'] = int(cur_issue.split('/')[0]) args['issue'] = make_issue_number(args['journal_name'], args['issue_number'], args['issue_year']) if len(arg_list) > 4: args['category'] = urllib.unquote(arg_list[4]) if len(arg_list) > 5: try: args['recid'] = int(urllib.unquote(arg_list[5])) except: pass args['ln'] = get_journal_preferred_language(args['journal_name'], args['ln']) # FIXME : wash arguments? return args def make_journal_url(current_uri, custom_parameters=None): """ Create a URL, using the current URI and overriding values with the given custom_parameters Parameters: current_uri - *str* the current full URI custom_parameters - *dict* a dictionary of parameters that should override those of curent_uri """ if not custom_parameters: custom_parameters = {} default_params = parse_url_string(current_uri) for key, value in custom_parameters.iteritems(): # Override default params with custom params default_params[key] = str(value) uri = CFG_SITE_URL + '/journal/' if default_params['journal_name']: uri += urllib.quote(default_params['journal_name']) + '/' if default_params['issue_year'] and default_params['issue_number']: uri += make_issue_number(default_params['journal_name'], default_params['issue_number'], default_params['issue_year'], for_url_p=True) + '/' if default_params['category']: uri += urllib.quote(default_params['category']) if default_params['recid'] and \ default_params['recid'] != -1: uri += '/' + str(default_params['recid']) printed_question_mark = False if default_params['ln']: uri += '?ln=' + default_params['ln'] printed_question_mark = True if default_params['verbose'] != 0: if printed_question_mark: uri += '&verbose=' + str(default_params['verbose']) else: uri += '?verbose=' + str(default_params['verbose']) return uri ############################ HTML CACHING FUNCTIONS ############################ def cache_index_page(html, journal_name, category, issue, ln): """ Caches the index page main area of a Bulletin (right hand menu cannot be cached) """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.abspath('%s/webjournal/%s/%s_index_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, issue, category, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop caching return False cache_path_dir = '%s/webjournal/%s' % (CFG_CACHEDIR, journal_name) if not os.path.isdir(cache_path_dir): os.makedirs(cache_path_dir) cached_file = open(cache_path, "w") cached_file.write(html) cached_file.close() def get_index_page_from_cache(journal_name, category, issue, ln): """ Function to get an index page from the cache. False if not in cache. """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.abspath('%s/webjournal/%s/%s_index_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, issue, category, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop reading cache return False try: cached_file = open(cache_path).read() except: return False return cached_file def cache_article_page(html, journal_name, category, recid, issue, ln): """ Caches an article view of a journal. """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.abspath('%s/webjournal/%s/%s_article_%s_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, issue, category, recid, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop caching return cache_path_dir = '%s/webjournal/%s' % (CFG_CACHEDIR, journal_name) if not os.path.isdir(cache_path_dir): os.makedirs(cache_path_dir) cached_file = open(cache_path, "w") cached_file.write(html) cached_file.close() def get_article_page_from_cache(journal_name, category, recid, issue, ln): """ Gets an article view of a journal from cache. False if not in cache. """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.abspath('%s/webjournal/%s/%s_article_%s_%s_%s.html' % \ (CFG_CACHEDIR, journal_name, issue, category, recid, ln)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop reading cache return False try: cached_file = open(cache_path).read() except: return False return cached_file def clear_cache_for_article(journal_name, category, recid, issue): """ Resets the cache for an article (e.g. after an article has been modified) """ issue = issue.replace("/", "_") category = category.replace(" ", "") cache_path = os.path.abspath('%s/webjournal/%s/' % (CFG_CACHEDIR, journal_name)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop deleting cache return False # try to delete the article cached file try: os.remove('%s/webjournal/%s/%s_article_%s_%s_en.html' % (CFG_CACHEDIR, journal_name, issue, category, recid)) except: pass try: os.remove('%s/webjournal/%s/%s_article_%s_%s_fr.html' % (CFG_CACHEDIR, journal_name, issue, category, recid)) except: pass # delete the index page for the category try: os.remove('%s/webjournal/%s/%s_index_%s_en.html' % (CFG_CACHEDIR, journal_name, issue, category)) except: pass try: os.remove('%s/webjournal/%s/%s_index_%s_fr.html' % (CFG_CACHEDIR, journal_name, issue, category)) except: pass try: path = get_journal_article_cache_path(journal_name, issue) os.remove(path) except: pass return True def clear_cache_for_issue(journal_name, issue): """ clears the cache of a whole issue. """ issue = issue.replace("/", "_") cache_path_dir = os.path.abspath('%s/webjournal/%s' % \ (CFG_CACHEDIR, journal_name)) if not cache_path_dir.startswith(CFG_CACHEDIR + '/webjournal'): # Mmh, not accessing correct path. Stop deleting cache return False all_cached_files = os.listdir(cache_path_dir) non_deleted = [] for cached_file in all_cached_files: if cached_file.startswith(issue.replace('/', '_')): try: os.remove(cache_path_dir + '/' + cached_file) except: return False else: non_deleted.append(cached_file) return True ######################### CERN SPECIFIC FUNCTIONS ################# def get_recid_from_legacy_number(issue_number, category, number): """ Returns the recid based on the issue number, category and 'number'. This is used to support URLs using the now deprecated 'number' argument. The function tries to reproduce the behaviour of the old way of doing, even keeping some of its 'problems' (so that we reach the same article as before with a given number).. Returns the recid as int, or -1 if not found """ recids = [] if issue_number[0] == "0": alternative_issue_number = issue_number[1:] recids = list(search_pattern(p='65017a:"%s" and 773__n:%s' % (category, issue_number))) recids.extend(list(search_pattern(p='65017a:"%s" and 773__n:%s' % (category, alternative_issue_number)))) else: recids = list(search_pattern(p='65017:"%s" and 773__n:%s' % (category, issue_number))) # Now must order the records and pick the one at index 'number'. # But we have to take into account that there can be multiple # records at position 1, and that these additional records should # be numbered with negative numbers: # 1, 1, 1, 2, 3 -> 1, -1, -2, 2, 3... negative_index_records = {} positive_index_records = {} # Fill in 'negative_index_records' and 'positive_index_records' # lists with the following loop for recid in recids: bfo = BibFormatObject(recid) order = [subfield['c'] for subfield in bfo.fields('773__') if \ issue_number in subfield.get('n', '')] if len(order) > 0: # If several orders are defined for the same article and # the same issue, keep the first one order = order[0] if order.isdigit(): # Order must be an int. Otherwise skip order = int(order) if order == 1 and positive_index_records.has_key(1): # This is then a negative number for this record index = (len(negative_index_records.keys()) > 0 and \ min(negative_index_records.keys()) -1) or 0 negative_index_records[index] = recid else: # Positive number for this record if not positive_index_records.has_key(order): positive_index_records[order] = recid else: # We make the assumption that we cannot have # twice the same position for two # articles. Previous WebJournal module was not # clear about that. Just drop this record # (better than crashing or looping forever..) pass recid_to_return = -1 # Ok, we can finally pick the recid corresponding to 'number' if number <= 0: negative_indexes = negative_index_records.keys() negative_indexes.sort() negative_indexes.reverse() if len(negative_indexes) > abs(number): recid_to_return = negative_index_records[negative_indexes[abs(number)]] else: if positive_index_records.has_key(number): recid_to_return = positive_index_records[number] return recid_to_return def is_recid_in_released_issue(recid): """ Returns True if recid is part of the latest issue of the given journal. WARNING: the function does not check that the article does not belong to the draft collection of the record. This is wanted, in order to workaround the time needed for a record to go from the draft collection to the final collection """ bfo = BibFormatObject(recid) journal_name = '' journal_names = [journal_name for journal_name in bfo.fields('773__t') if journal_name] if journal_names: journal_name = journal_names[0] else: return False existing_journal_names = [o['journal_name'] for o in get_journals_ids_and_names()] if not journal_name in existing_journal_names: # Try to remove whitespace journal_name = journal_name.replace(' ', '') if not journal_name in existing_journal_names: # Journal name unknown from WebJournal return False config_strings = get_xml_from_config(["draft_image_access_policy"], journal_name) if config_strings['draft_image_access_policy'] and \ config_strings['draft_image_access_policy'][0] != 'allow': # The journal does not want to optimize access to images return False article_issues = bfo.fields('773__n') current_issue = get_current_issue(CFG_SITE_LANG, journal_name) for article_issue in article_issues: # Check each issue until a released one is found if get_release_datetime(article_issue, journal_name): # Release date exists, issue has been released return True else: # Unreleased issue. Do we still allow based on journal config? unreleased_issues_mode = get_unreleased_issue_hiding_mode(journal_name) if (unreleased_issues_mode == 'none' or \ (unreleased_issues_mode == 'future' and \ not issue_is_later_than(article_issue, current_issue))): return True return False diff --git a/modules/webjournal/lib/webjournaladminlib.py b/modules/webjournal/lib/webjournaladminlib.py index 477d2540f..b06670702 100644 --- a/modules/webjournal/lib/webjournaladminlib.py +++ b/modules/webjournal/lib/webjournaladminlib.py @@ -1,957 +1,957 @@ ## This file is part of Invenio. ## Copyright (C) 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable=C0301 """Invenio WebJournal Administration Interface.""" __revision__ = "$Id$" import sys import cPickle import re import os -from urllib2 import urlopen if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from invenio.errorlib import register_exception from invenio.config import \ CFG_SITE_URL, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_ETCDIR, \ CFG_CACHEDIR, \ CFG_TMPSHAREDDIR, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_RECORD from invenio.messages import gettext_set_language from invenio.mailutils import send_email from invenio.access_control_engine import acc_authorize_action from invenio.webjournal_config import \ InvenioWebJournalJournalIdNotFoundDBError, \ InvenioWebJournalReleaseUpdateError, \ InvenioWebJournalNoJournalOnServerError from invenio.webjournal_utils import \ get_journals_ids_and_names, \ guess_journal_name, \ get_current_issue, \ get_issue_number_display, \ get_featured_records, \ add_featured_record, \ remove_featured_record, \ clear_cache_for_issue, \ get_next_journal_issues, \ get_release_datetime, \ get_journal_id, \ compare_issues, \ get_journal_info_path, \ get_journal_css_url, \ get_journal_alert_sender_email, \ get_journal_alert_recipient_email, \ get_journal_draft_keyword_to_remove, \ get_journal_categories, \ get_journal_articles, \ get_grouped_issues, \ get_journal_issue_grouping, \ get_journal_languages, \ get_journal_collection_to_refresh_on_release, \ get_journal_index_to_refresh_on_release, \ - issue_is_later_than + issue_is_later_than, \ + WEBJOURNAL_OPENER from invenio.dbquery import run_sql from invenio.bibrecord import \ create_record, \ print_rec from invenio.bibformat import format_record from invenio.bibtask import task_low_level_submission, bibtask_allocate_sequenceid from invenio.search_engine import get_all_collections_of_a_record import invenio.template wjt = invenio.template.load('webjournal') def getnavtrail(previous = ''): """Get the navtrail""" navtrail = """Admin Area """ % (CFG_SITE_URL,) navtrail = navtrail + previous return navtrail def perform_index(ln=CFG_SITE_LANG, journal_name=None, action=None, uid=None): """ Index page Lists the journals, and offers options to edit them, delete them or add new journal. Parameters: journal_name - the journal affected by action, if any action - one of ['', 'askDelete', _('Delete'), _('Cancel')] ln - language uid - user id """ _ = gettext_set_language(ln) msg = None if action == 'askDelete' and journal_name is not None: msg = '''
    Delete Journal ConfigurationAre you sure you want to delete the configuration of %(journal_name)s?
    ''' % {'CFG_SITE_URL': CFG_SITE_URL, 'journal_name': journal_name, 'delete': _("Delete"), 'cancel': _("Cancel")} if action == _("Delete") and journal_name is not None: # User confirmed and clicked on "Delete" button remove_journal(journal_name) journals = get_journals_ids_and_names() # Only keep journal that user can view or edit journals = [(journal_info, acc_authorize_action(uid, 'cfgwebjournal', name=journal_info['journal_name'], with_editor_rights='yes')[0] == 0) \ for journal_info in journals \ if acc_authorize_action(uid, 'cfgwebjournal', name=journal_info['journal_name'])[0] == 0] return wjt.tmpl_admin_index(ln=ln, journals=journals, msg=msg) def perform_administrate(ln=CFG_SITE_LANG, journal_name=None, as_editor=True): """ Administration of a journal Show the current and next issues/publications, and display links to more specific administrative pages. Parameters: journal_name - the journal to be administrated ln - language with_editor_rights - True if can edit configuration. Read-only mode otherwise """ if journal_name is None: try: journal_name = guess_journal_name(ln) except InvenioWebJournalNoJournalOnServerError, e: return e.user_box() if not can_read_xml_config(journal_name): return 'Configuration could not be read. Please check that %s/webjournal/%s/%s-config.xml exists and can be read by the server.
    ' % (CFG_ETCDIR, journal_name, journal_name) current_issue = get_current_issue(ln, journal_name) current_publication = get_issue_number_display(current_issue, journal_name, ln) issue_list = get_grouped_issues(journal_name, current_issue) next_issue_number = get_next_journal_issues(issue_list[-1], journal_name, 1) return wjt.tmpl_admin_administrate(journal_name, current_issue, current_publication, issue_list, next_issue_number[0], ln, as_editor=as_editor) def perform_feature_record(journal_name, recid, img_url='', action='', ln=CFG_SITE_LANG): """ Interface to feature a record Used to list, add and remove featured records of the journal. Parameters: journal_name - the journal for which the article is featured recid - the record affected by 'action' img_url - the URL to image displayed with given record (only when action == 'add') action - One of ['', 'add', 'askremove', _('Remove'), _('Cancel')] ln - language """ _ = gettext_set_language(ln) if action == 'add': result = add_featured_record(journal_name, recid, img_url) if result == 0: msg ='''Successfully featured record %(recid)s. Go to the %(name)s journal to see the result.''' % {'CFG_SITE_URL': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'name': journal_name, 'recid': recid} elif result == 1: msg = '''record %(recid)s is already featured. Choose another one or remove it first.''' % \ {'CFG_SITE_URL': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'recid': recid} else: msg = '''Record could not be featured. Check file permission.''' featured_records = get_featured_records(journal_name) return wjt.tmpl_admin_feature_record(ln=ln, journal_name=journal_name, featured_records=featured_records, msg=msg) elif action == 'askremove': msg = '''
    Remove featured recordAre you sure you want to remove record %(recid)s from the list of featured record?
    ''' % \ {'CFG_SITE_URL': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'name': journal_name, 'recid': recid, 'cancel': _("Cancel"), 'remove': _("Remove")} featured_records = get_featured_records(journal_name) return wjt.tmpl_admin_feature_record(ln=ln, journal_name=journal_name, featured_records=featured_records, msg=msg) elif action == _("Remove"): result = remove_featured_record(journal_name, recid) msg = '''Record %(recid)s has been removed.''' % \ {'CFG_SITE_URL': CFG_SITE_URL, 'CFG_SITE_RECORD': CFG_SITE_RECORD, 'recid': recid} featured_records = get_featured_records(journal_name) return wjt.tmpl_admin_feature_record(ln=ln, journal_name=journal_name, featured_records=featured_records, msg=msg) else: msg = '''Here you can choose which records from the %s should be featured on the journal webpage.''' % CFG_SITE_NAME featured_records = get_featured_records(journal_name) return wjt.tmpl_admin_feature_record(ln=ln, journal_name=journal_name, featured_records=featured_records, msg=msg) def perform_regenerate_issue(issue, journal_name, ln=CFG_SITE_LANG, confirmed_p=False, publish_draft_articles_p=False): """ Clears the cache for the given issue. Parameters: journal_name - the journal for which the cache should be deleted issue - the issue for which the cache should be deleted ln - language confirmed_p - if True, regenerate. Else ask confirmation publish_draft_articles_p - should the remaining draft articles in the issue be made public? """ if not confirmed_p: # Ask user confirmation about the regeneration current_issue = get_current_issue(ln, journal_name) issue_released_p = not issue_is_later_than(issue, current_issue) return wjt.tmpl_admin_regenerate_confirm(ln, journal_name, issue, issue_released_p) else: # Regenerate the issue (clear the cache) success = clear_cache_for_issue(journal_name, issue) if publish_draft_articles_p: current_issue = get_current_issue(ln, journal_name) if not issue_is_later_than(issue, current_issue): # This issue is already released: we can safely publish # the articles. Otherwise we'll refuse to publish the drafts move_drafts_articles_to_ready(journal_name, issue) if success: return wjt.tmpl_admin_regenerate_success(ln, journal_name, issue) else: return wjt.tmpl_admin_regenerate_error(ln, journal_name, issue) def perform_request_issue_control(journal_name, issues, action, ln=CFG_SITE_LANG): """ Central logic for issue control. Regenerates the flat files 'current_issue' and 'issue_group' of the journal that control which issue is currently active for the journal. Parameters: journal_name - the journal affected by 'action' issues - list of issues affected by 'action' TODO: check action - One of ['cfg', _('Add'), _('Refresh'), _('Publish'), _('Update')] ln - language """ _ = gettext_set_language(ln) out = '' if action == "cfg" or action == _("Refresh") or action == _("Add"): # find out if we are in update or release current_issue = get_current_issue(ln, journal_name) grouped_issues = get_grouped_issues(journal_name, current_issue) if current_issue != grouped_issues[-1]: # The current issue has "pending updates", i.e. is grouped # with unreleased issues. Propose to update these issues next_issue = grouped_issues[grouped_issues.index(current_issue) + 1] out = wjt.tmpl_admin_update_issue(ln, journal_name, next_issue, current_issue) else: # Propose a release next_issues = get_next_journal_issues(current_issue, journal_name, n=get_journal_issue_grouping(journal_name)) if action == _("Refresh"): next_issues += issues next_issues = list(set(next_issues))# avoid double entries elif action == _("Add"): next_issues += issues next_issues = list(set(next_issues))# avoid double entries next_issues.sort(compare_issues) highest_issue_so_far = next_issues[-1] one_more_issue = get_next_journal_issues(highest_issue_so_far, journal_name, 1) next_issues += one_more_issue next_issues = list(set(next_issues)) # avoid double entries else: # get the next issue numbers to publish next_issues = get_next_journal_issues(current_issue, journal_name, n=get_journal_issue_grouping(journal_name)) next_issues.sort(compare_issues) out = wjt.tmpl_admin_control_issue(ln, journal_name, next_issues) elif action == _("Publish"): # Publish the given issues (mark them as current issues) publish_issues = issues publish_issues = list(set(publish_issues)) # avoid double entries publish_issues.sort(compare_issues) if len(publish_issues) == 0: # User did not select an issue current_issue = get_current_issue(ln, journal_name) next_issues = get_next_journal_issues(current_issue, journal_name, n=get_journal_issue_grouping(journal_name)) out = '

    ' + \ _('Please select an issue') + '

    ' out += wjt.tmpl_admin_control_issue(ln, journal_name, next_issues) return out try: release_journal_issue(publish_issues, journal_name, ln) except InvenioWebJournalJournalIdNotFoundDBError, e: register_exception(req=None) return e.user_box() out = wjt.tmpl_admin_control_issue_success_msg(ln, publish_issues, journal_name) elif action == _("Update"): try: try: update_issue = issues[0] except: raise InvenioWebJournalReleaseUpdateError(ln, journal_name) except InvenioWebJournalReleaseUpdateError, e: register_exception(req=None) return e.user_box() try: release_journal_update(update_issue, journal_name, ln) except InvenioWebJournalJournalIdNotFoundDBError, e: register_exception(req=None) return e.user_box() out = wjt.tmpl_admin_updated_issue_msg(ln, update_issue, journal_name) return out def perform_request_alert(journal_name, issue, sent, plain_text, subject, recipients, html_mail, force, ln=CFG_SITE_LANG): """ All the logic for alert emails. Display a form to edit email/recipients and options to send the email. Sent in HTML/PlainText or only PlainText if wished so. Also prevent mistake of sending the alert more than one for a particular issue. Parameters: journal_name - the journal for which the alert is sent issue - the issue for which the alert is sent sent - Display interface to edit email if "False" (string). Else send the email. plain_text - the text of the mail subject - the subject of the mail recipients - the recipients of the mail (string with comma-separated emails) html_mail - if 'html', also send email as HTML (copying from the current issue on the web) force - if different than "False", the email is sent even if it has already been sent. ln - language """ # FIXME: more flexible options to choose the language of the alert languages = get_journal_languages(journal_name) if languages: alert_ln = languages[0] else: alert_ln = CFG_SITE_LANG if not get_release_datetime(issue, journal_name, ln): # Trying to send an alert for an unreleased issue return wjt.tmpl_admin_alert_unreleased_issue(ln, journal_name) if sent == "False": # Retrieve default message, subject and recipients, and # display email editor subject = wjt.tmpl_admin_alert_subject(journal_name, alert_ln, issue) plain_text = wjt.tmpl_admin_alert_plain_text(journal_name, alert_ln, issue) plain_text = plain_text.encode('utf-8') recipients = get_journal_alert_recipient_email(journal_name) return wjt.tmpl_admin_alert_interface(ln, journal_name, subject, plain_text, recipients, alert_ln) else: # User asked to send the mail if was_alert_sent_for_issue(issue, journal_name, ln) != False and force == "False": # Mmh, email already sent before for this issue. Ask # confirmation return wjt.tmpl_admin_alert_was_already_sent(ln, journal_name, subject, plain_text, recipients, html_mail, issue) html_string = None if html_mail == "html": # Also send as HTML: retrieve from current issue - html_file = urlopen('%s/journal/%s?ln=%s' + html_file = WEBJOURNAL_OPENER.open('%s/journal/%s?ln=%s' % (CFG_SITE_URL, journal_name, alert_ln)) html_string = html_file.read() html_file.close() html_string = put_css_in_file(html_string, journal_name) html_string = insert_journal_link(html_string, journal_name, issue, ln) sender_email = get_journal_alert_sender_email(journal_name) send_email(sender_email, recipients, subject, plain_text, html_string, header='', footer='', html_header='', html_footer='', charset='utf-8') update_DB_for_alert(issue, journal_name, ln) return wjt.tmpl_admin_alert_success_msg(ln, journal_name) def perform_request_configure(journal_name, xml_config, action, ln=CFG_SITE_LANG): """ Add a new journal or configure the settings of an existing journal. Parameters: journal_name - the journal to configure, or name of the new journal xml_config - the xml configuration of the journal (string) action - One of ['edit', 'editDone', 'add', 'addDone'] ln - language """ msg = None if action == 'edit': # Read existing config if journal_name is not None: if not can_read_xml_config(journal_name): return 'Configuration could not be read. Please check that %s/webjournal/%s/%s-config.xml exists and can be read by the server.
    ' % (CFG_ETCDIR, journal_name, journal_name) config_path = '%s/webjournal/%s/%s-config.xml' % (CFG_ETCDIR, journal_name, journal_name) xml_config = file(config_path).read() else: # cannot edit unknown journal... return 'You must specify a journal name' if action in ['editDone', 'addDone']: # Save config if action == 'addDone': res = add_journal(journal_name, xml_config) if res == -1: msg = 'A journal with that name already exists. Please choose another name.' action = 'add' elif res == -2: msg = 'Configuration could not be written (no permission). Please manually copy your config to %s/webjournal/%s/%s-config.xml
    ' % (CFG_ETCDIR, journal_name, journal_name) action = 'edit' elif res == -4: msg = 'Cache file could not be written (no permission). Please manually create directory %s/webjournal/%s/ and make it writable for your Apache user
    ' % (CFG_CACHEDIR, journal_name) action = 'edit' elif res > 0: msg = 'Journal successfully added.' action = 'edit' else: msg = 'An error occurred. The journal could not be added' action = 'edit' if action == 'add': # Display a sample config. xml_config = ''' Atlantis Times %(CFG_SITE_URL)s /img/AtlantisTimes.css /img/AtlantisTimes.css AtlantisTimes_Index.bft AtlantisTimes_Detailed.bft AtlantisTimes_Search.bft AtlantisTimes_Popup.bft AtlantisTimes_Contact.bft News, 980__a:ATLANTISTIMESNEWS or 980__a:ATLANTISTIMESNEWSDRAFT Science, 980__a:ATLANTISTIMESSCIENCE or 980__a:ATLANTISTIMESSCIENCEDRAFT Arts, 980__a:ATLANTISTIMESARTS or 980__a:ATLANTISTIMESARTSDRAFT 2 52 all 773__n 773__c %(CFG_SITE_SUPPORT_EMAIL)s recipients@atlantis.atl en,fr DEMOJRN DEMOJRN_RN 02/2009 DRAFT ''' % {'CFG_SITE_URL': CFG_SITE_URL, 'CFG_SITE_SUPPORT_EMAIL': CFG_SITE_SUPPORT_EMAIL} out = wjt.tmpl_admin_configure_journal(ln=ln, journal_name=journal_name, xml_config=xml_config, action=action, msg=msg) return out ######################## ADDING/REMOVING JOURNALS ############################### def add_journal(journal_name, xml_config): """ Add a new journal to the DB. Also create the configuration file Parameters: journal_name - the name (used in URLs) of the new journal xml_config - the xml configuration of the journal (string) Returns: the id of the journal if successfully added -1 if could not be added because journal name already exists -2 if config could not be saved -3 if could not be added for other reasons -4 if database cache could not be added """ try: get_journal_id(journal_name) except InvenioWebJournalJournalIdNotFoundDBError: # Perfect, journal does not exist res = run_sql("INSERT INTO jrnJOURNAL (name) VALUES(%s)", (journal_name,)) # Also save xml_config config_dir = '%s/webjournal/%s/' % (CFG_ETCDIR, journal_name) try: if not os.path.exists(config_dir): os.makedirs(config_dir) xml_config_file = file(config_dir + journal_name + '-config.xml', 'w') xml_config_file.write(xml_config) xml_config_file.close() except Exception: res = -2 # And save some info in file in case database is down journal_info_path = get_journal_info_path(journal_name) journal_info_dir = os.path.dirname(journal_info_path) if not os.path.exists(journal_info_dir): try: os.makedirs(journal_info_dir) except Exception: if res <= 0: res = -4 journal_info_file = open(journal_info_path, 'w') cPickle.dump({'journal_id': res, 'journal_name': journal_name, 'current_issue':'01/2000'}, journal_info_file) return res return -1 def remove_journal(journal_name): """ Remove a journal from the DB. Does not completely remove everything, in case it was an error from the editor.. Parameters: journal_name - the journal to remove Returns: the id of the journal if successfully removed or -1 if could not be removed because journal name does not exist or -2 if could not be removed for other reasons """ run_sql("DELETE FROM jrnJOURNAL WHERE name=%s", (journal_name,)) ######################## TIME / ISSUE FUNCTIONS ############################### def release_journal_issue(publish_issues, journal_name, ln=CFG_SITE_LANG): """ Releases a new issue. This sets the current issue in the database to 'publish_issues' for given 'journal_name' Parameters: journal_name - the journal for which we release a new issue publish_issues - the list of issues that will be considered as current (there can be several) ln - language """ journal_id = get_journal_id(journal_name, ln) if len(publish_issues) > 1: publish_issues.sort(compare_issues) low_bound = publish_issues[0] high_bound = publish_issues[-1] issue_display = '%s-%s/%s' % (low_bound.split("/")[0], high_bound.split("/")[0], high_bound.split("/")[1]) # remember convention: if we are going over a new year, take the higher else: issue_display = publish_issues[0] # produce the DB lines for publish_issue in publish_issues: move_drafts_articles_to_ready(journal_name, publish_issue) run_sql("INSERT INTO jrnISSUE (id_jrnJOURNAL, issue_number, issue_display) \ VALUES(%s, %s, %s)", (journal_id, publish_issue, issue_display)) # set first issue to published release_journal_update(publish_issues[0], journal_name, ln) # update information in file (in case DB is down) journal_info_path = get_journal_info_path(journal_name) journal_info_file = open(journal_info_path, 'w') cPickle.dump({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': get_current_issue(ln, journal_name)}, journal_info_file) def delete_journal_issue(issue, journal_name, ln=CFG_SITE_LANG): """ Deletes an issue from the DB. (Not currently used) """ journal_id = get_journal_id(journal_name, ln) run_sql("DELETE FROM jrnISSUE WHERE issue_number=%s \ AND id_jrnJOURNAL=%s",(issue, journal_id)) # update information in file (in case DB is down) journal_info_path = get_journal_info_path(journal_name) journal_info_file = open(journal_info_path, 'w') cPickle.dump({'journal_id': journal_id, 'journal_name': journal_name, 'current_issue': get_current_issue(ln, journal_name)}, journal_info_file) def was_alert_sent_for_issue(issue, journal_name, ln): """ Returns False if alert has not already been sent for given journal and issue, else returns time of last alert, as time tuple Parameters: journal_name - the journal for which we want to check last alert issue - the issue for which we want to check last alert ln - language Returns: time tuple or False. Eg: (2008, 4, 25, 7, 58, 37, 4, 116, -1) """ journal_id = get_journal_id(journal_name, ln) date_announced = run_sql("SELECT date_announced FROM jrnISSUE \ WHERE issue_number=%s \ AND id_jrnJOURNAL=%s", (issue, journal_id))[0][0] if date_announced == None: return False else: return date_announced.timetuple() def update_DB_for_alert(issue, journal_name, ln): """ Update the 'last sent alert' timestamp for the given journal and issue. Parameters: journal_name - the journal for which we want to update the time of last alert issue - the issue for which we want to update the time of last alert ln - language """ journal_id = get_journal_id(journal_name, ln) run_sql("UPDATE jrnISSUE set date_announced=NOW() \ WHERE issue_number=%s \ AND id_jrnJOURNAL=%s", (issue, journal_id)) def release_journal_update(update_issue, journal_name, ln=CFG_SITE_LANG): """ Releases an update to a journal. """ move_drafts_articles_to_ready(journal_name, update_issue) journal_id = get_journal_id(journal_name, ln) run_sql("UPDATE jrnISSUE set date_released=NOW() \ WHERE issue_number=%s \ AND id_jrnJOURNAL=%s", (update_issue, journal_id)) def move_drafts_articles_to_ready(journal_name, issue): """ Move draft articles to their final "collection". To do so we rely on the convention that an admin-chosen keyword must be removed from the metadata """ protected_datafields = ['100', '245', '246', '520', '590', '700'] keyword_to_remove = get_journal_draft_keyword_to_remove(journal_name) collections_to_refresh = {} indexes_to_refresh = get_journal_index_to_refresh_on_release(journal_name) bibindex_indexes_params = [] if indexes_to_refresh: bibindex_indexes_params = ['-w', ','.join(indexes_to_refresh)] categories = get_journal_categories(journal_name, issue) task_sequence_id = str(bibtask_allocate_sequenceid()) for category in categories: articles = get_journal_articles(journal_name, issue, category) for order, recids in articles.iteritems(): for recid in recids: record_xml = format_record(recid, of='xm') if not record_xml: continue new_record_xml_path = os.path.join(CFG_TMPSHAREDDIR, 'webjournal_publish_' + \ str(recid) + '.xml') if os.path.exists(new_record_xml_path): # Do not modify twice continue record_struc = create_record(record_xml) record = record_struc[0] new_record = update_draft_record_metadata(record, protected_datafields, keyword_to_remove) new_record_xml = print_rec(new_record) if new_record_xml.find(keyword_to_remove) >= 0: new_record_xml = new_record_xml.replace(keyword_to_remove, '') # Write to file new_record_xml_file = file(new_record_xml_path, 'w') new_record_xml_file.write(new_record_xml) new_record_xml_file.close() # Submit task_low_level_submission('bibupload', 'WebJournal', '-c', new_record_xml_path, '-I', task_sequence_id) task_low_level_submission('bibindex', 'WebJournal', '-i', str(recid), '-I', task_sequence_id, *bibindex_indexes_params) for collection in get_all_collections_of_a_record(recid): collections_to_refresh[collection] = '' # Refresh collections collections_to_refresh.update([(c, '') for c in get_journal_collection_to_refresh_on_release(journal_name)]) for collection in collections_to_refresh.keys(): task_low_level_submission('webcoll', 'WebJournal', '-f', '-P', '2', '-p', '1', '-c', collection, '-I', task_sequence_id) def update_draft_record_metadata(record, protected_datafields, keyword_to_remove): """ Returns a new record with fields that should be modified in order for this draft record to be considered as 'ready': keep only controlfield 001 and non-protected fields that contains the 'keyword_to_remove' Parameters: record - a single recored (as BibRecord structure) protected_datafields - *list* tags that should not be part of the returned record keyword_to_remove - *str* keyword that should be considered when checking if a field should be part of the returned record. """ new_record = {} for tag, field in record.iteritems(): if tag in protected_datafields: continue elif not keyword_to_remove in str(field) and \ not tag == '001': continue else: # Keep new_record[tag] = field return new_record ######################## XML CONFIG ############################### def can_read_xml_config(journal_name): """ Check that configuration xml for given journal name is exists and can be read. """ config_path = '%s/webjournal/%s/%s-config.xml' % \ (CFG_ETCDIR, journal_name, journal_name) try: file(config_path).read() except IOError: return False return True ######################## EMAIL HELPER FUNCTIONS ############################### def insert_journal_link(html_string, journal_name, issue, ln): """ Insert a warning regarding HTML formatting inside mail client and link to journal page just after the body of the page. @param html_string: the HTML newsletter @param journal_name: the journal name @param issue: journal issue for which the alert is sent (in the form number/year) @param ln: language """ def replace_body(match_obj): "Replace body with itself + header message" header = wjt.tmpl_admin_alert_header_html(journal_name, ln, issue) return match_obj.group() + header return re.sub('', replace_body, html_string, 1) def put_css_in_file(html_message, journal_name): """ Retrieve the CSS of the journal and insert/inline it in the section of the given html_message. (Used for HTML alert emails) Parameters: journal_name - the journal name html_message - the html message (string) in which the CSS should be inserted Returns: the HTML message with its CSS inlined """ css_path = get_journal_css_url(journal_name) if not css_path: return - css_file = urlopen(css_path) + css_file = WEBJOURNAL_OPENER.open(css_path) css = css_file.read() css = make_full_paths_in_css(css, journal_name) html_parted = html_message.split("") if len(html_parted) > 1: html = '%s%s' % (html_parted[0], css, html_parted[1]) else: html_parted = html_message.split("") if len(html_parted) > 1: html = '%s%s' % (html_parted[0], css, html_parted[1]) else: return return html def make_full_paths_in_css(css, journal_name): """ Update the URLs in a CSS from relative to absolute URLs, so that the URLs are accessible from anywhere (Used for HTML alert emails) Parameters: journal_name - the journal name css - a cascading stylesheet (string) Returns: (str) the given css with relative paths converted to absolute paths """ url_pattern = re.compile('''url\(["']?\s*(?P\S*)\s*["']?\)''', re.DOTALL) url_iter = url_pattern.finditer(css) rel_to_full_path = {} for url in url_iter: url_string = url.group("url") url_string = url_string.replace('"', "") url_string = url_string.replace("'", "") if url_string[:6] != "http://": rel_to_full_path[url_string] = '"%s/img/webjournal_%s/%s"' % \ (CFG_SITE_URL, journal_name, url_string) for url in rel_to_full_path.keys(): css = css.replace(url, rel_to_full_path[url]) return css diff --git a/modules/webjournal/lib/widgets/bfe_webjournal_widget_seminars.py b/modules/webjournal/lib/widgets/bfe_webjournal_widget_seminars.py index e7c4c95d4..7a2a4d040 100644 --- a/modules/webjournal/lib/widgets/bfe_webjournal_widget_seminars.py +++ b/modules/webjournal/lib/widgets/bfe_webjournal_widget_seminars.py @@ -1,249 +1,248 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ WebJournal widget - Display Indico seminars """ from invenio.config import CFG_CACHEDIR, CFG_SITE_LANG -from urllib2 import urlopen from xml.dom import minidom -from invenio.urlutils import create_Indico_request_url +from invenio.urlutils import create_Indico_request_url, make_invenio_opener import time import base64 import socket from invenio.webjournal_utils import \ - parse_url_string + parse_url_string, WEBJOURNAL_OPENER from invenio.messages import gettext_set_language update_frequency = 3600 # in seconds def format_element(bfo, indico_baseurl="https://indico.cern.ch", indico_what='categ', indico_loc="", indico_id="1l7", indico_key="", indico_sig="", indico_onlypublic='yes', indico_from="today", indico_to='today', indico_credential_path=""): """ Display the list of seminar from the given Indico instance See Indico HTTP Export APIs: http://indico.cern.ch/ihelp/html/ExportAPI/index.html @param indico_baseurl: Indico base URL from which to retrieve information @param indico_what: element to export @type indico_what: one of the strings: C{categ}, C{event}, C{room}, C{reservation} @param indico_loc: location of the element(s) specified by ID (only used for some elements) @param indico_id: ID of the element to be exported @type indico_id: a string or a list/tuple of strings @param indico_type: output format @type indico_type: one of the strings: C{json}, C{jsonp}, C{xml}, C{html}, C{ics}, C{atom} @param indico_params: parameters of the query. See U{http://indico.cern.ch/ihelp/html/ExportAPI/common.html} @param indico_key: API key provided for the given Indico instance @param indico_sig: API secret key (signature) provided for the given Indico instance @param indico_credential_path: if provided, load 'indico_key' and 'indico_sig' from this path """ args = parse_url_string(bfo.user_info['uri']) journal_name = args["journal_name"] cached_filename = "webjournal_widget_seminars_%s.xml" % journal_name out = get_widget_html(bfo, indico_baseurl, indico_what, indico_loc, indico_id, indico_onlypublic, indico_from, indico_to, indico_key, indico_sig, indico_credential_path, cached_filename, bfo.lang) return out def escape_values(bfo): """ Called by BibFormat in order to check if output of this element should be escaped. """ return 0 def get_widget_html(bfo, indico_baseurl, indico_what, indico_loc, indico_id, indico_onlypublic, indico_from, indico_to, indico_key, indico_sig, indico_credential_path, cached_filename, ln=CFG_SITE_LANG): """ Indico seminars of the day service Gets seminars of the day from CERN Indico every 60 minutes and displays them in a widget. """ _ = gettext_set_language(ln) try: seminar_xml = minidom.parse('%s/%s' % (CFG_CACHEDIR, cached_filename)) except: try: _update_seminars(indico_baseurl, indico_what, indico_loc, indico_id, indico_onlypublic, indico_from, indico_to, indico_key, indico_sig, indico_credential_path, cached_filename) seminar_xml = minidom.parse('%s/%s' % (CFG_CACHEDIR, cached_filename)) except: return "
    • " + _("No information available") + "
    " try: timestamp = seminar_xml.firstChild.getAttribute("time") except: timestamp = time.struct_time() last_update = time.mktime(time.strptime(timestamp, "%a, %d %b %Y %H:%M:%S %Z")) now = time.mktime(time.gmtime()) if last_update + update_frequency < now: try: _update_seminars(indico_baseurl, indico_what, indico_loc, indico_id, indico_onlypublic, indico_from, indico_to, indico_key, indico_sig, indico_credential_path, cached_filename) seminar_xml = minidom.parse('%s/%s' % (CFG_CACHEDIR, cached_filename)) except: return "
    • " + _("No information available") + "
    " html = "" seminars = seminar_xml.getElementsByTagName("seminar") if len(seminars) == 0: return "
    • " + _("No seminars today") + "
    " for seminar in seminars: html += "
  • " try: seminar_time = seminar.getElementsByTagName("start_time")[0].firstChild.toxml(encoding="utf-8") except: seminar_time = "" try: category = seminar.getElementsByTagName("category")[0].firstChild.toxml(encoding="utf-8") except: category = "Seminar" html += '%s %s
    ' % (seminar_time, category) try: title = seminar.getElementsByTagName("title")[0].firstChild.toxml(encoding="utf-8") except: title = "" try: url = seminar.getElementsByTagName("url")[0].firstChild.toxml(encoding="utf-8") except: url = "#" try: speaker = seminar.getElementsByTagName("speaker")[0].firstChild.toxml(encoding="utf-8") except: speaker = "" if (title != ""): html += '%s, %s
    ' % (url, title, speaker) try: location = seminar.getElementsByTagName("location")[0].firstChild.toxml(encoding="utf-8") + ' ' except: location = "" html += location try: room = seminar.getElementsByTagName("room")[0].firstChild.toxml(encoding="utf-8") except: room = "" html += room html += "
  • " html = '
      ' + html + '
    ' return html def _update_seminars(indico_baseurl, indico_what, indico_loc, indico_id, indico_onlypublic, indico_from, indico_to, indico_key, indico_sig, indico_credential_path, cached_filename): """ helper function that gets the xml data source from CERN Indico and creates a dedicated xml file in the cache for easy use in the widget. """ if indico_credential_path: indico_key, indico_sig = get_indico_credentials(indico_credential_path) url = create_Indico_request_url(indico_baseurl, indico_what, indico_loc, indico_id, 'xml', {'onlypublic': indico_onlypublic, 'from': indico_from, 'to': indico_to}, indico_key, indico_sig) default_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(2.0) try: try: - indico_xml = urlopen(url) + indico_xml = WEBJOURNAL_OPENER.open(url) except: return finally: socket.setdefaulttimeout(default_timeout) xml_file_handler = minidom.parseString(indico_xml.read()) seminar_xml = ['' % time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()), ] agenda_items = xml_file_handler.getElementsByTagName("conference") for item in agenda_items: seminar_xml.extend(["", ]) try: start_date = item.getElementsByTagName("startDate")[0].firstChild.toxml(encoding="utf-8") start_time = start_date[11:16] except: start_time = "" seminar_xml.extend(["%s" % start_time, ]) try: category = item.getElementsByTagName("category")[0].firstChild.toxml(encoding="utf-8") category = category.split("/")[-1] category = category.replace("&", "") category = category.replace("nbsp;", "") category = category.replace(" ", "") except: category = "" seminar_xml.extend(["%s" % category, ]) try: title = item.getElementsByTagName("title")[0].firstChild.toxml(encoding="utf-8") except: title = "" seminar_xml.extend(["%s" % title, ]) try: url = item.getElementsByTagName("url")[0].firstChild.toxml(encoding="utf-8") except: url = "#" seminar_xml.extend(["%s" % url, ]) try: speaker = item.getElementsByTagName("fullName")[0].firstChild.toxml(encoding="utf-8") except: speaker = "" seminar_xml.extend(["%s" % speaker, ]) try: room = item.getElementsByTagName("room")[0].firstChild.toxml(encoding="utf-8") except: room = "" seminar_xml.extend(["%s" % room, ]) try: location = item.getElementsByTagName("location")[0].firstChild.toxml(encoding="utf-8") except: location = "" seminar_xml.extend(["%s" % location, ]) seminar_xml.extend(["", ]) seminar_xml.extend(["", ]) # write the created file to cache fptr = open("%s/%s" % (CFG_CACHEDIR, cached_filename), "w") fptr.write("\n".join(seminar_xml)) fptr.close() def get_indico_credentials(path): """ Returns the Indico API key and (secret) signature as a tuple (public_key, private_key). """ try: fd = open(path, "r") _indico_credentials = fd.read() fd.close() except IOError, e: return ('', '') return base64.decodestring(_indico_credentials).split('\n', 1) _ = gettext_set_language('en') dummy = _("What's on today") dummy = _("Seminars of the week") diff --git a/modules/webjournal/lib/widgets/bfe_webjournal_widget_weather.py b/modules/webjournal/lib/widgets/bfe_webjournal_widget_weather.py index 801d0758d..a62e90225 100644 --- a/modules/webjournal/lib/widgets/bfe_webjournal_widget_weather.py +++ b/modules/webjournal/lib/widgets/bfe_webjournal_widget_weather.py @@ -1,225 +1,224 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ WebJournal widget - Display weather forecast """ import os import time import re import socket -from urllib2 import urlopen try: # Try to load feedparser. Remember for later if it was installed # or not. Note that feedparser is slow to load: if we don't load # it in a 'global' way, it will be loaded for every call to this # element. global feedparser import feedparser feedparser_available = 1 except ImportError: feedparser_available = 0 from invenio.config import \ CFG_CACHEDIR, \ CFG_ACCESS_CONTROL_LEVEL_SITE from invenio.errorlib import register_exception from invenio.webjournal_utils import \ - parse_url_string + parse_url_string, WEBJOURNAL_OPENER from invenio.messages import gettext_set_language re_image_pattern = re.compile(r'\S*)"\s*/>', re.DOTALL | re.IGNORECASE | re.VERBOSE) yahoo_weather_rss_base_url = 'http://weather.yahooapis.com/forecastrss?w=%(location)s&u=%(degree_unit)s' def format_element(bfo, location='782041', degree_unit='c' , display_weather_icon='false', weather_icon_only='false'): """ Display the latest weather forecast from Yahoo Weather (See http://developer.yahoo.com/weather/) @param location: Yahoo location code for the forecast @param degree_unit: Degree unit ('f'=Fahrenheit or 'c'=Celsius) @param display_weather_icon: if 'true', display weather icon inside the forecasts @param weather_icon_only: it 'true' display only the wheater icon (without text) """ if not feedparser_available: return "" args = parse_url_string(bfo.user_info['uri']) journal_name = args["journal_name"] cached_filename = "webjournal_widget_weather_%s.rss" % journal_name expire_time_filename = "webjournal_widget_weather_%s_RSS_expires" % \ journal_name out = get_widget_html(yahoo_weather_rss_base_url % \ {'location': location, 'degree_unit': degree_unit}, cached_filename, expire_time_filename, journal_name) if weather_icon_only == 'true': try: out = '' % \ re_image_pattern.findall(out)[0][1] except: register_exception(req=bfo.req) out = '' elif display_weather_icon == 'false': try: out = re.sub(re_image_pattern, "", out) except: register_exception(req=bfo.req) out = '' return out def escape_values(bfo): """ Called by BibFormat in order to check if output of this element should be escaped. """ return 0 def get_widget_html(yahoo_weather_rss, cached_filename, expire_time_filename, journal_name): """ weather forecast using Yahoo! Weather service we check and store the "expires" data from the rss feed to decide when an update is needed. there always resides a cached version in cds CFG_CACHEDIR along with a flat file that indicates the time when the feed expires. """ cached_weather_box = _get_weather_from_cache(journal_name) if cached_weather_box: return cached_weather_box # No HTML cache? Then read locally saved feed data, and even # refresh it from Yahoo if it has expired. try: cached_rss_path = os.path.join(CFG_CACHEDIR, cached_filename) assert(os.path.exists(cached_rss_path)) weather_feed = feedparser.parse(cached_rss_path) assert(not weather_feed.bozo_exception) except: try: _update_feed(yahoo_weather_rss, cached_filename, expire_time_filename) weather_feed = feedparser.parse('%s/%s' % \ (CFG_CACHEDIR, cached_filename)) except: return "
    • " + _("No information available") + "
    " now_in_gmt = time.gmtime() try: expire_time = time.strptime(open(expire_time_filename).read(), "%a, %d %b %Y %H:%M:%S %Z") diff = time.mktime(expire_time) - time.mktime(now_in_gmt) except: diff = -1 if diff < 0: try: _update_feed(yahoo_weather_rss, cached_filename, expire_time_filename) weather_feed = feedparser.parse('%s/%s' % \ (CFG_CACHEDIR, cached_filename)) except: return "
    • " + _("No information available") + "
    " # Construct the HTML. Well, simply take the one provided by # Yahoo.. html = weather_feed.entries[0]['summary'] cache_weather(html, journal_name) return html def _get_weather_from_cache(journal_name): """ Try to get the weather information from cache. Return False if cache does not exist """ cache_path = os.path.abspath('%s/webjournal/%s/weather.html' % \ (CFG_CACHEDIR, journal_name)) if not cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Make sure we are reading from correct directory (you # know, in case there are '../../' inside journal name..) return False try: last_update = os.path.getctime(cache_path) except: return False now = time.time() if (last_update + 15*60) < now: # invalidate after 15 minutes return False try: cached_file = open(cache_path).read() except: return False return cached_file def cache_weather(html, journal_name): """ Caches the weather box for 30 minutes. """ if not CFG_ACCESS_CONTROL_LEVEL_SITE == 2: cache_path = os.path.abspath('%s/webjournal/%s/weather.html' % \ (CFG_CACHEDIR, journal_name)) if cache_path.startswith(CFG_CACHEDIR + '/webjournal'): # Do not try to cache if the journal name led us to some # other directory ('../../' inside journal name for # example) cache_dir = CFG_CACHEDIR + '/webjournal/' + journal_name if not os.path.isdir(cache_dir): os.makedirs(cache_dir) cache_file = file(cache_path, "w") cache_file.write(html) cache_file.close() def _update_feed(yahoo_weather_rss, cached_filename, expire_time_filename): """ Retrieve the latest weather information from Yahoo and write it to 'cached_filename'. Also write the supposed expiration date provided by Yahoo to 'expire_time_filename'. """ default_timeout = socket.getdefaulttimeout() socket.setdefaulttimeout(2.0) try: try: - feed = urlopen(yahoo_weather_rss) + feed = WEBJOURNAL_OPENER.open(yahoo_weather_rss) except: return finally: socket.setdefaulttimeout(default_timeout) cached_file = open('%s/%s' % (CFG_CACHEDIR, cached_filename), 'w') cached_file.write(feed.read()) cached_file.close() feed_data = feedparser.parse(yahoo_weather_rss) expire_time = feed_data.headers['expires'] expire_file = open('%s/%s' % (CFG_CACHEDIR, expire_time_filename), 'w') expire_file.write(expire_time) expire_file.close() _ = gettext_set_language('en') dummy = _("Under the CERN sky") diff --git a/modules/websearch/lib/websearch_external_collections_getter.py b/modules/websearch/lib/websearch_external_collections_getter.py index 38303f328..b158788fb 100644 --- a/modules/websearch/lib/websearch_external_collections_getter.py +++ b/modules/websearch/lib/websearch_external_collections_getter.py @@ -1,246 +1,246 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Module to download web pages using asyncore. Example 1, downloading a set of webpages : from websearch_external_collections_getter import * urls = ['http://www.google.fr', 'http://linuxfr.org'] pagegetters = [HTTPAsyncPageGetter(url) for url in urls] async_download(pagegetters) for pagegetter in pagegetters: if pagegetter.done: print pagegetter.data else: print "Error downloading : " + pagegetter.uri Example 2, downloading a set of webpages but with callback function. def func(pagegetter, data, current_time): print "OK (%f): " % current_time + pagegetter.uri + " - " + data from websearch_external_collections_getter import * urls = ['http://www.google.fr', 'http://linuxfr.org'] pagegetters = [HTTPAsyncPageGetter(url) for url in urls] async_download(pagegetters, func, ['info1', 'info2'], 10) """ __revision__ = "$Id$" import asyncore import mimetools import socket import sys import StringIO import time import urlparse #from invenio.websearch_external_collections_config import CFG_EXTERNAL_COLLECTION_TIMEOUT from invenio.config import CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT CFG_EXTERNAL_COLLECTION_TIMEOUT = CFG_WEBSEARCH_EXTERNAL_COLLECTION_SEARCH_TIMEOUT def async_download(pagegetter_list, finish_function=None, datastructure_list=None, timeout=15, print_search_info=True, print_body=True): """Download web pages asynchronously with timeout. pagegetter_list : list of HTTPAsyncPageGetter objects finish_function : function called when a web page is downloaded; prototype def funct(pagetter, datastructure, current_time, print_search_info(optional)) datastructure_list : list (same size as pagegetter_list) with information to pass as datastructure to the finish function. timeout : float, timeout in seconds. print_search_info: boolean, whether to print the search info or not in the finish function""" time_start = time.time() finished_list = [False] * len(pagegetter_list) nb_remaining = 0 check_redirected(pagegetter_list) for pagegetter in pagegetter_list: if pagegetter and not pagegetter.done: nb_remaining += 1 while (time.time() - time_start < timeout) and nb_remaining > 0: if sys.hexversion < 0x2040000: asyncore.poll(0.01) else: asyncore.loop(0.01, True, None, 1) check_redirected(pagegetter_list) for i in range(len(pagegetter_list)): if pagegetter_list[i] and not finished_list[i] and pagegetter_list[i].done: nb_remaining -= 1 if finish_function: if datastructure_list: datastructure = datastructure_list[i] else: datastructure = None current_time = time.time() - time_start try: finish_function(pagegetter_list[i], datastructure, current_time, print_search_info, print_body) except TypeError: finish_function(pagegetter_list[i], datastructure, current_time) finished_list[i] = True return finished_list class HTTPAsyncPageGetter(asyncore.dispatcher_with_send): """Class to download a web page using asyncore.""" def __init__(self, uri): asyncore.dispatcher_with_send.__init__(self) self.uri = uri self.redirected = None self.status = None self.header = None self.done = False self.data = "" self.header_data = "" self.create_socket(socket.AF_INET, socket.SOCK_STREAM) - self.request, self.host, self.port = build_request(self.uri) + self.request, self.host, self.port = build_rest_request(self.uri) try: self.connect((self.host, self.port)) except: self.done = True def handle_connect(self): """Handle the connection event. By sending the request to the server.""" try: self.send(self.request) except socket.error: # do nothing because self.done is false by default pass def handle_expt(self): """Handle an exception. Close the socket and put done at True.""" self.close() self.done = True def handle_read(self): """Handle a read event.""" data = self.recv(1024) if not self.header: self.header_data += data (self.status, self.header, data) = decode_header(self.header_data) if self.status is not None: if self.status[1] in ("301", "302"): self.redirected = self.header["location"] self.data += data def handle_close(self): """Handle a close event.""" self.done = True self.close() def log_info(self, message, type='info'): """ Workaround broken asyncore log_info method that tries to print to stdout. """ print >> sys.stderr, "%s: %s" % (type, message) -def build_request(uri): +def build_rest_request(uri): """Build an http request for a specific url.""" scheme, host, path, params, query, dummy = urlparse.urlparse(uri) assert scheme == "http", "only supports HTTP requests (uri = " + uri + ")" host, port = decode_host_port(host) path = encode_path(path, params, query) request = "GET %s HTTP/1.0\r\n" % (path) + \ "User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/48 (like Gecko) Safari/48\r\n" + \ "Accept: text/html, image/jpeg, image/png, text/*, image/*, */*\r\n" + \ "Accept-Charset: utf-8, utf-8;q=0.5, *;q=0.5\r\n" + \ "Host: %s\r\n" % (host) + \ "Connection: close\r\n\r\n" return (request, host, port) def decode_host_port(host): """Decode the host string in an (host, port) pair.""" try: host, port = host.split(":", 1) port = int(port) except (TypeError, ValueError): port = 80 return (host, port) def encode_path(path, params, query): """Bind the path, the params and the query in a unique http path.""" if not path: path = "/" if params: path = path + ";" + params if query: path = path + "?" + query return path def decode_header(data): """Try to decode an html header. If the header can be decoded, will return (status, header, remaining_data) If it cannot, (None, None, data) """ i = data.find("\r\n\r\n") size = 4 if i == -1: i = data.find("\n\n") size = 2 if i == -1: return (None, None, data) # parse header header_fp = StringIO.StringIO(data[:i+size]) # status line is "HTTP/version status message" status = header_fp.readline() status = status.split(" ", 2) # followed by a rfc822-style message header header = mimetools.Message(header_fp) # followed by a newline, and the payload (if any) data = data[i+size:] return (status, header, data) def check_redirected(pagegetter_list): """Check if a redirection occured in the engines_list.""" for i in range(len(pagegetter_list)): getter = pagegetter_list[i] if getter and getter.redirected is not None: if getter.redirected.startswith('http://'): getter = HTTPAsyncPageGetter(getter.redirected) else: getter.done = True pagegetter_list[i] = getter def fetch_url_content(urls, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT): """Given a list of urls this function returns a list of their contents using a optional custom timeout.""" urls_content = [] try: pagegetters_list = [HTTPAsyncPageGetter(url) for url in urls] except AssertionError: return [None] * len(urls) async_download(pagegetters_list, None, None, timeout) for i in range(len(pagegetters_list)): if pagegetters_list[i].done: urls_content.append(pagegetters_list[i].data) else: urls_content.append(None) return urls_content diff --git a/modules/websession/lib/Makefile.am b/modules/websession/lib/Makefile.am index 990dc792b..4684028a0 100644 --- a/modules/websession/lib/Makefile.am +++ b/modules/websession/lib/Makefile.am @@ -1,32 +1,32 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. pylibdir = $(libdir)/python/invenio pylib_DATA = session.py webuser.py webuser_tests.py \ websession_templates.py websession_webinterface.py \ webgroup.py webgroup_dblayer.py websession_config.py \ webaccount.py websession_regression_tests.py \ webgroup_regression_tests.py webuser_regression_tests.py \ - webgroup_tests.py inveniogc.py \ + webgroup_tests.py inveniogc.py webuser_config.py \ websession_web_tests.py noinst_DATA = password_migration_kit.py EXTRA_DIST = $(pylib_DATA) $(noinst_DATA) CLEANFILES = *~ *.tmp *.pyc diff --git a/modules/websession/lib/webaccount.py b/modules/websession/lib/webaccount.py index ca5b94342..98df64c83 100644 --- a/modules/websession/lib/webaccount.py +++ b/modules/websession/lib/webaccount.py @@ -1,426 +1,435 @@ ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" import re import MySQLdb import urllib from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \ CFG_CERN_SITE, \ CFG_SITE_LANG, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_ADMIN_EMAIL, \ CFG_SITE_SECURE_URL, \ CFG_VERSION, \ CFG_DATABASE_HOST, \ CFG_DATABASE_NAME, \ CFG_SITE_RECORD from invenio.access_control_engine import acc_authorize_action from invenio.access_control_config import CFG_EXTERNAL_AUTHENTICATION, \ SUPERADMINROLE, CFG_EXTERNAL_AUTH_DEFAULT from invenio.dbquery import run_sql from invenio.webuser import getUid, get_user_preferences, \ collect_user_info from invenio.access_control_admin import acc_find_user_role_actions from invenio.messages import gettext_set_language from invenio.external_authentication import InvenioWebAccessExternalAuthError import invenio.template websession_templates = invenio.template.load('websession') +from invenio import web_api_key + def perform_info(req, ln): """Display the main features of CDS personalize""" uid = getUid(req) user_info = collect_user_info(req) return websession_templates.tmpl_account_info( ln = ln, uid = uid, guest = int(user_info['guest']), CFG_CERN_SITE = CFG_CERN_SITE, ) def perform_display_external_user_settings(settings, ln): """show external user settings which is a dictionary.""" _ = gettext_set_language(ln) html_settings = "" print_settings = False settings_keys = settings.keys() settings_keys.sort() for key in settings_keys: value = settings[key] if key.startswith("EXTERNAL_") and not "HIDDEN_" in key: print_settings = True key = key[9:].capitalize() html_settings += websession_templates.tmpl_external_setting(ln, key, value) return print_settings and websession_templates.tmpl_external_user_settings(ln, html_settings) or "" def perform_youradminactivities(user_info, ln): """Return text for the `Your Admin Activities' box. Analyze whether user UID has some admin roles, and if yes, then print suitable links for the actions he can do. If he's not admin, print a simple non-authorized message.""" your_role_actions = acc_find_user_role_actions(user_info) your_roles = [] your_admin_activities = [] guest = int(user_info['guest']) for (role, action) in your_role_actions: if role not in your_roles: your_roles.append(role) if action not in your_admin_activities: your_admin_activities.append(action) if SUPERADMINROLE in your_roles: for action in ("runbibedit", "cfgbibformat", "cfgbibharvest", "cfgoairepository", "cfgbibrank", "cfgbibindex", "cfgwebaccess", "cfgwebcomment", "cfgwebsearch", "cfgwebsubmiit", "cfgbibknowledge", "runbatchuploader"): if action not in your_admin_activities: your_admin_activities.append(action) return websession_templates.tmpl_account_adminactivities( ln = ln, uid = user_info['uid'], guest = guest, roles = your_roles, activities = your_admin_activities, ) def perform_display_account(req, username, bask, aler, sear, msgs, loan, grps, sbms, appr, admn, ln): """Display a dynamic page that shows the user's account.""" # load the right message language _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) #your account if int(user_info['guest']): user = "guest" login = "%s/youraccount/login?ln=%s" % (CFG_SITE_SECURE_URL, ln) accBody = _("You are logged in as guest. You may want to %(x_url_open)slogin%(x_url_close)s as a regular user.") %\ {'x_url_open': '', 'x_url_close': ''} accBody += "

    " bask=aler=msgs= _("The %(x_fmt_open)sguest%(x_fmt_close)s users need to %(x_url_open)sregister%(x_url_close)s first") %\ {'x_fmt_open': '', 'x_fmt_close': '', 'x_url_open': '', 'x_url_close': ''} sear= _("No queries found") else: user = username accBody = websession_templates.tmpl_account_body( ln = ln, user = user, ) #Display warnings if user is superuser roles = acc_find_user_role_actions(user_info) warnings = "0" for role in roles: if "superadmin" in role: warnings = "1" break warning_list = superuser_account_warnings() #check if tickets ok tickets = (acc_authorize_action(user_info, 'runbibedit')[0] == 0) return websession_templates.tmpl_account_page( ln = ln, warnings = warnings, warning_list = warning_list, accBody = accBody, baskets = bask, alerts = aler, searches = sear, messages = msgs, loans = loan, groups = grps, submissions = sbms, approvals = appr, tickets = tickets, administrative = admn ) def superuser_account_warnings(): """Check to see whether admin accounts have default / blank password etc. Returns a list""" warning_array = [] #Try and connect to the mysql database with the default invenio password try: conn = MySQLdb.connect (host = CFG_DATABASE_HOST, user = "root", passwd = "my123p$ss", db = "mysql") conn.close() warning_array.append("warning_mysql_password_equal_to_invenio_password") except: pass #Try and connect to the invenio database with the default invenio password try: conn = MySQLdb.connect (host = CFG_DATABASE_HOST, user = "invenio", passwd = "my123p$ss", db = CFG_DATABASE_NAME) conn.close () warning_array.append("warning_invenio_password_equal_to_default") except: pass #Check if the admin password is empty res = run_sql("SELECT password, email from user where nickname = 'admin'") if res: res1 = run_sql("SELECT email from user where nickname = 'admin' and password = AES_ENCRYPT(%s,'')", (res[0][1], )) else: # no account nick-named `admin' exists; keep on going res1 = [] for user in res1: warning_array.append("warning_empty_admin_password") #Check if the admin email has been changed from the default if (CFG_SITE_ADMIN_EMAIL == "info@invenio-software.org" or CFG_SITE_SUPPORT_EMAIL == "info@invenio-software.org") and CFG_CERN_SITE == 0: warning_array.append("warning_site_support_email_equal_to_default") #Check for a new release of Invenio try: find = re.compile('Invenio v[0-9]+.[0-9]+.[0-9]+(\-rc[0-9])? is released') webFile = urllib.urlopen("http://invenio-software.org/repo/invenio/tree/RELEASE-NOTES") temp = "" version = "" version1 = "" while 1: temp = webFile.readline() match1 = find.match(temp) try: version = match1.group() break except: pass if not temp: break webFile.close() submatch = re.compile('[0-9]+.[0-9]+.[0-9]+(\-rc[0-9])?') version1 = submatch.search(version) web_version = version1.group().split(".") local_version = CFG_VERSION.split(".") if web_version[0] > local_version[0]: warning_array.append("note_new_release_available") elif web_version[0] == local_version[0] and web_version[1] > local_version[1]: warning_array.append("note_new_release_available") elif web_version[0] == local_version[0] and web_version[1] == local_version[1] and web_version[2] > local_version[2]: warning_array.append("note_new_release_available") except: warning_array.append("error_cannot_download_release_notes") return warning_array def template_account(title, body, ln): """It is a template for print each of the options from the user's account.""" return websession_templates.tmpl_account_template( ln = ln, title = title, body = body ) def warning_guest_user(type, ln=CFG_SITE_LANG): """It returns an alert message,showing that the user is a guest user and should log into the system.""" # load the right message language _ = gettext_set_language(ln) return websession_templates.tmpl_warning_guest_user( ln = ln, type = type, ) def perform_delete(ln): """Delete the account of the user, not implement yet.""" # TODO return websession_templates.tmpl_account_delete(ln = ln) def perform_set(email, ln, can_config_bibcatalog = False, verbose = 0): """Perform_set(email,password): edit your account parameters, email and password. If can_config_bibcatalog is True, show the bibcatalog dialog (if configured). """ try: res = run_sql("SELECT id, nickname FROM user WHERE email=%s", (email,)) uid = res[0][0] nickname = res[0][1] except: uid = 0 nickname = "" CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS_LOCAL = CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS prefs = get_user_preferences(uid) if CFG_EXTERNAL_AUTHENTICATION.has_key(prefs['login_method']) and CFG_EXTERNAL_AUTHENTICATION[prefs['login_method']] is not None: CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS_LOCAL = 3 out = websession_templates.tmpl_user_preferences( ln = ln, email = email, email_disabled = (CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS_LOCAL >= 2), password_disabled = (CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS_LOCAL >= 3), nickname = nickname, ) if len(CFG_EXTERNAL_AUTHENTICATION) > 1: try: uid = run_sql("SELECT id FROM user where email=%s", (email,)) uid = uid[0][0] except: uid = 0 current_login_method = prefs['login_method'] methods = CFG_EXTERNAL_AUTHENTICATION.keys() # Filtering out methods that don't provide user_exists to check if # a user exists in the external auth method before letting him/her # to switch. for method in methods: if CFG_EXTERNAL_AUTHENTICATION[method] is not None: try: if not CFG_EXTERNAL_AUTHENTICATION[method].user_exists(email): methods.remove(method) except (AttributeError, InvenioWebAccessExternalAuthError, NotImplementedError): methods.remove(method) methods.sort() if len(methods) > 1: out += websession_templates.tmpl_user_external_auth( ln = ln, methods = methods, current = current_login_method, method_disabled = (CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 4) ) current_group_records = prefs.get('websearch_group_records', 10) show_latestbox = prefs.get('websearch_latestbox', True) show_helpbox = prefs.get('websearch_helpbox', True) out += websession_templates.tmpl_user_websearch_edit( ln = ln, current = current_group_records, show_latestbox = show_latestbox, show_helpbox = show_helpbox, ) preferred_lang = prefs.get('language', ln) out += websession_templates.tmpl_user_lang_edit( ln = ln, preferred_lang = preferred_lang ) + + keys_info = web_api_key.show_web_api_keys(uid=uid) + out+=websession_templates.tmpl_user_api_key( + ln = ln, + keys_info = keys_info + ) + #show this dialog only if the system has been configured to use a ticket system from invenio.config import CFG_BIBCATALOG_SYSTEM if CFG_BIBCATALOG_SYSTEM and can_config_bibcatalog: bibcatalog_username = prefs.get('bibcatalog_username', "") bibcatalog_password = prefs.get('bibcatalog_password', "") out += websession_templates.tmpl_user_bibcatalog_auth(bibcatalog_username, \ bibcatalog_password, ln=ln) if verbose >= 9: for key, value in prefs.items(): out += "%s:%s
    " % (key, value) out += perform_display_external_user_settings(prefs, ln) return out def create_register_page_box(referer='', ln=CFG_SITE_LANG): """Register a new account.""" return websession_templates.tmpl_register_page( referer = referer, ln = ln, level = CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, ) ## create_login_page_box(): ask for the user's email and password, for login into the system def create_login_page_box(referer='', ln=CFG_SITE_LANG): # List of referer regexep and message to print _ = gettext_set_language(ln) login_referrer2msg = ( (re.compile(r"/search"), "

    " + _("This collection is restricted. If you think you have right to access it, please authenticate yourself.") + "

    "), (re.compile(r"/%s/\d+/files/.+" % CFG_SITE_RECORD), "

    " + _("This file is restricted. If you think you have right to access it, please authenticate yourself.") + "

    "), ) msg = "" for regexp, txt in login_referrer2msg: if regexp.search(referer): msg = txt break internal = None for system in CFG_EXTERNAL_AUTHENTICATION.keys(): if CFG_EXTERNAL_AUTHENTICATION[system] is None: internal = system break register_available = CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS <= 1 and internal ## Let's retrieve all the login method that are not dedicated to robots methods = [method[0] for method in CFG_EXTERNAL_AUTHENTICATION.iteritems() if not method[1] or not method[1].robot_login_method_p()] methods.sort() return websession_templates.tmpl_login_form( ln = ln, referer = referer, internal = internal, register_available = register_available, methods = methods, selected_method = CFG_EXTERNAL_AUTH_DEFAULT, msg = msg, ) # perform_logout: display the message of not longer authorized, def perform_logout(req, ln): return websession_templates.tmpl_account_logout(ln = ln) #def perform_lost: ask the user for his email, in order to send him the lost password def perform_lost(ln): return websession_templates.tmpl_lost_password_form(ln) #def perform_reset_password: ask the user for a new password to reset the lost one def perform_reset_password(ln, email, reset_key, msg=''): return websession_templates.tmpl_reset_password_form(ln, email, reset_key, msg) # perform_emailSent(email): confirm that the password has been emailed to 'email' address def perform_emailSent(email, ln): return websession_templates.tmpl_account_emailSent(ln = ln, email = email) # peform_emailMessage : display a error message when the email introduced is not correct, and sugest to try again def perform_emailMessage(eMsg, ln): return websession_templates.tmpl_account_emailMessage( ln = ln, msg = eMsg ) # perform_back(): template for return to a previous page, used for login,register and setting def perform_back(mess, url, linkname, ln='en'): return websession_templates.tmpl_back_form( ln = ln, message = mess, url = url, link = linkname, ) diff --git a/modules/websession/lib/websession_templates.py b/modules/websession/lib/websession_templates.py index a9941f46c..19789fa1c 100644 --- a/modules/websession/lib/websession_templates.py +++ b/modules/websession/lib/websession_templates.py @@ -1,2474 +1,2568 @@ ## This file is part of Invenio. ## Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __revision__ = "$Id$" import urllib import cgi from invenio.config import \ CFG_CERN_SITE, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_NAME_INTL, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_SECURE_URL, \ CFG_SITE_URL, \ CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS, \ CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS, \ CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS, \ CFG_WEBSEARCH_MAX_RECORDS_IN_GROUPS, \ CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \ CFG_SITE_RECORD from invenio.access_control_config import CFG_EXTERNAL_AUTH_USING_SSO, \ - CFG_EXTERNAL_AUTH_LOGOUT_SSO + CFG_EXTERNAL_AUTH_LOGOUT_SSO, CFG_WEB_API_KEY_STATUS from invenio.urlutils import make_canonical_urlargd, create_url, create_html_link from invenio.htmlutils import escape_html, nmtoken_from_string from invenio.messages import gettext_set_language, language_list_long from invenio.websession_config import CFG_WEBSESSION_GROUP_JOIN_POLICY class Template: def tmpl_back_form(self, ln, message, url, link): """ A standard one-message-go-back-link page. Parameters: - 'ln' *string* - The language to display the interface in - 'message' *string* - The message to display - 'url' *string* - The url to go back to - 'link' *string* - The link text """ out = """
    %(message)s %(link)s
    """% { 'message' : message, 'url' : url, 'link' : link, 'ln' : ln } return out def tmpl_external_setting(self, ln, key, value): _ = gettext_set_language(ln) out = """ %s: %s """ % (key, value) return out def tmpl_external_user_settings(self, ln, html_settings): _ = gettext_set_language(ln) out = """

    %(external_user_settings)s

    %(html_settings)s

    %(external_user_groups)s

    %(consult_external_groups)s

    """ % { 'external_user_settings' : _('External account settings'), 'html_settings' : html_settings, 'consult_external_groups' : _('You can consult the list of your external groups directly in the %(x_url_open)sgroups page%(x_url_close)s.') % { 'x_url_open' : '' % ln, 'x_url_close' : '' }, 'external_user_groups' : _('External user groups'), } return out + def tmpl_user_api_key(self, ln=CFG_SITE_LANG, keys_info=None): + """ + Displays all the API key that the user owns the user + + Parameters: + + - 'ln' *string* - The language to display the interface in + - 'key_info' *tuples* - Contains the tuples with the key data (id, desciption, status) + + """ + + # load the right message language + _ = gettext_set_language(ln) + + out = """ + +

    %(user_api_key)s

    + """ % { + 'user_api_key' : _("API keys") + } + + if keys_info and len(keys_info) != 0: + out += "

    %(user_keys)s

    " % {'user_keys': _("These are your current API keys")} + out += """ + + """ + for key_info in keys_info: + out += """ + + + + + + + + + + """ % { + 'key_description': _("Description: " + cgi.escape(key_info[1])), + 'key_status': _("Status: " + key_info[2]), + 'key_id': _(key_info[0]), + 'index': keys_info.index(key_info), + 'key_label': _("API key"), + 'remove_key' : _("Delete key"), + 'sitesecureurl': CFG_SITE_SECURE_URL, + 'input_type': ("submit", "hidden")[key_info[2] == CFG_WEB_API_KEY_STATUS['REVOKED']] + } + out += "
    %(key_description)s%(key_status)s
    + %(key_label)s + %(key_id)s
    +
    + + +
    +
    " + + out += """ +
    +

    %(create_new_key)s

    + + + + +
    +
    + (%(mandatory)s) +
    +
    + %(note)s: + %(new_key_description_note)s + +
    + +
    +
    + """ % { + 'create_new_key' : _("If you want to create a new API key, please enter a description for it"), + 'new_key_description_label' : _("Description for the new API key"), + 'mandatory' : _("mandatory"), + 'note' : _("Note"), + 'new_key_description_note': _("The description should be something meaningful for you to recognize the API key"), + 'create_new_key_button' : _("Create new key"), + 'sitesecureurl': CFG_SITE_SECURE_URL + } + + return out + def tmpl_user_preferences(self, ln, email, email_disabled, password_disabled, nickname): """ Displays a form for the user to change his email/password. Parameters: - 'ln' *string* - The language to display the interface in - 'email' *string* - The email of the user - 'email_disabled' *boolean* - If the user has the right to edit his email - 'password_disabled' *boolean* - If the user has the right to edit his password - 'nickname' *string* - The nickname of the user (empty string if user does not have it) """ # load the right message language _ = gettext_set_language(ln) out = """

    %(edit_params)s

    %(change_user)s


    (%(mandatory)s)
    %(nickname_prefix)s%(nickname)s%(nickname_suffix)s
    %(note)s: %(fixed_nickname_note)s

    (%(mandatory)s)

    %(example)s: john.doe@example.com
       
    """ % { 'change_user' : _("If you want to change your email or set for the first time your nickname, please set new values in the form below."), 'edit_params' : _("Edit login credentials"), 'nickname_label' : _("Nickname"), 'nickname' : nickname, 'nickname_prefix' : nickname=='' and '
    '+_("Example")+':johnd' or '', 'new_email' : _("New email address"), 'mandatory' : _("mandatory"), 'example' : _("Example"), 'note' : _("Note"), 'set_values' : _("Set new values"), 'email' : email, 'email_disabled' : email_disabled and "readonly" or "", 'sitesecureurl': CFG_SITE_SECURE_URL, 'fixed_nickname_note' : _('Since this is considered as a signature for comments and reviews, once set it can not be changed.') } if not password_disabled and not CFG_EXTERNAL_AUTH_USING_SSO: out += """

    %(change_pass)s



    %(note)s: %(old_password_note)s


    %(note)s: %(password_note)s
       
    """ % { 'change_pass' : _("If you want to change your password, please enter the old one and set the new value in the form below."), 'mandatory' : _("mandatory"), 'old_password' : _("Old password"), 'new_password' : _("New password"), 'optional' : _("optional"), 'note' : _("Note"), 'password_note' : _("The password phrase may contain punctuation, spaces, etc."), 'old_password_note' : _("You must fill the old password in order to set a new one."), 'retype_password' : _("Retype password"), 'set_values' : _("Set new password"), 'password_disabled' : password_disabled and "disabled" or "", 'sitesecureurl': CFG_SITE_SECURE_URL, } elif not CFG_EXTERNAL_AUTH_USING_SSO and CFG_CERN_SITE: out += "

    " + _("""If you are using a lightweight CERN account you can %(x_url_open)sreset the password%(x_url_close)s.""") % \ {'x_url_open' : \ '' \ % (make_canonical_urlargd({'email': email, 'returnurl' : CFG_SITE_SECURE_URL + '/youraccount/edit' + make_canonical_urlargd({'lang' : ln}, {})}, {})), 'x_url_close' : ''} + "

    " elif CFG_EXTERNAL_AUTH_USING_SSO and CFG_CERN_SITE: out += "

    " + _("""You can change or reset your CERN account password by means of the %(x_url_open)sCERN account system%(x_url_close)s.""") % \ {'x_url_open' : '', 'x_url_close' : ''} + "

    " return out def tmpl_user_bibcatalog_auth(self, bibcatalog_username="", bibcatalog_password="", ln=CFG_SITE_LANG): """template for setting username and pw for bibcatalog backend""" _ = gettext_set_language(ln) out = """

    %(edit_bibcatalog_settings)s

    %(username)s: %(password)s:
    """ % { 'sitesecureurl' : CFG_SITE_SECURE_URL, 'bibcatalog_username' : bibcatalog_username, 'bibcatalog_password' : bibcatalog_password, 'edit_bibcatalog_settings' : _("Edit cataloging interface settings"), 'username' : _("Username"), 'password' : _("Password"), 'update_settings' : _('Update settings') } return out def tmpl_user_lang_edit(self, ln, preferred_lang): _ = gettext_set_language(ln) out = """

    %(edit_lang_settings)s

    """ % { 'select_lang' : _('Select desired language of the web interface.'), 'update_settings' : _('Update settings') } return out def tmpl_user_websearch_edit(self, ln, current = 10, show_latestbox = True, show_helpbox = True): _ = gettext_set_language(ln) out = """

    %(edit_websearch_settings)s

    """ % { 'update_settings' : _("Update settings"), 'select_group_records' : _("Number of search results per page"), } return out def tmpl_user_external_auth(self, ln, methods, current, method_disabled): """ Displays a form for the user to change his authentication method. Parameters: - 'ln' *string* - The language to display the interface in - 'methods' *array* - The methods of authentication - 'method_disabled' *boolean* - If the user has the right to change this - 'current' *string* - The currently selected method """ # load the right message language _ = gettext_set_language(ln) out = """
    %(edit_method)s

    %(explain_method)s:

    %(select_method)s: """ % { 'edit_method' : _("Edit login method"), 'explain_method' : _("Please select which login method you would like to use to authenticate yourself"), 'select_method' : _("Select method"), 'sitesecureurl': CFG_SITE_SECURE_URL, } for system in methods: out += """
    """ % { 'system' : system, 'disabled' : method_disabled and 'disabled="disabled"' or "", 'selected' : current == system and 'checked="checked"' or "", 'id' : nmtoken_from_string(system), } out += """
     
    """ % { 'select_method' : _("Select method"), } return out def tmpl_lost_password_form(self, ln): """ Displays a form for the user to ask for his password sent by email. Parameters: - 'ln' *string* - The language to display the interface in - 'msg' *string* - Explicative message on top of the form. """ # load the right message language _ = gettext_set_language(ln) out = "

    " + _("If you have lost the password for your %(sitename)s %(x_fmt_open)sinternal account%(x_fmt_close)s, then please enter your email address in the following form in order to have a password reset link emailed to you.") % {'x_fmt_open' : '', 'x_fmt_close' : '', 'sitename' : CFG_SITE_NAME_INTL[ln]} + "

    " out += """
     
    """ % { 'ln': ln, 'email' : _("Email address"), 'send' : _("Send password reset link"), } if CFG_CERN_SITE: out += "

    " + _("If you have been using the %(x_fmt_open)sCERN login system%(x_fmt_close)s, then you can recover your password through the %(x_url_open)sCERN authentication system%(x_url_close)s.") % {'x_fmt_open' : '', 'x_fmt_close' : '', 'x_url_open' : '' \ % make_canonical_urlargd({'lf': 'auth', 'returnURL' : CFG_SITE_SECURE_URL + '/youraccount/login?ln='+ln}, {}), 'x_url_close' : ''} + " " else: out += "

    " + _("Note that if you have been using an external login system, then we cannot do anything and you have to ask there.") + " " out += _("Alternatively, you can ask %s to change your login system from external to internal.") % ("""%(email)s""" % { 'email' : CFG_SITE_SUPPORT_EMAIL }) + "

    " return out def tmpl_account_info(self, ln, uid, guest, CFG_CERN_SITE): """ Displays the account information Parameters: - 'ln' *string* - The language to display the interface in - 'uid' *string* - The user id - 'guest' *boolean* - If the user is guest - 'CFG_CERN_SITE' *boolean* - If the site is a CERN site """ # load the right message language _ = gettext_set_language(ln) out = """

    %(account_offer)s

    """ % { 'account_offer' : _("%s offers you the possibility to personalize the interface, to set up your own personal library of documents, or to set up an automatic alert query that would run periodically and would notify you of search results by email.") % CFG_SITE_NAME_INTL[ln], } if not guest: out += """
    %(your_settings)s
    %(change_account)s
    """ % { 'ln' : ln, 'your_settings' : _("Your Settings"), 'change_account' : _("Set or change your account email address or password. Specify your preferences about the look and feel of the interface.") } out += """
    %(your_searches)s
    %(search_explain)s
    """ % { 'ln' : ln, 'your_searches' : _("Your Searches"), 'search_explain' : _("View all the searches you performed during the last 30 days."), } out += """
    %(your_baskets)s
    %(basket_explain)s""" % { 'ln' : ln, 'your_baskets' : _("Your Baskets"), 'basket_explain' : _("With baskets you can define specific collections of items, store interesting records you want to access later or share with others."), } if guest and CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS: out += self.tmpl_warning_guest_user(ln = ln, type = "baskets") out += """
    %(your_alerts)s
    %(explain_alerts)s""" % { 'ln' : ln, 'your_alerts' : _("Your Alerts"), 'explain_alerts' : _("Subscribe to a search which will be run periodically by our service. The result can be sent to you via Email or stored in one of your baskets."), } if guest and CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS: out += self.tmpl_warning_guest_user(type="alerts", ln = ln) out += "
    " if CFG_CERN_SITE: out += """
    %(your_loans)s
    %(explain_loans)s
    """ % { 'your_loans' : _("Your Loans"), 'explain_loans' : _("Check out book you have on loan, submit borrowing requests, etc. Requires CERN ID."), 'ln': ln, 'CFG_SITE_SECURE_URL': CFG_SITE_SECURE_URL } out += """
    """ return out def tmpl_warning_guest_user(self, ln, type): """ Displays a warning message about the specified type Parameters: - 'ln' *string* - The language to display the interface in - 'type' *string* - The type of data that will get lost in case of guest account (for the moment: 'alerts' or 'baskets') """ # load the right message language _ = gettext_set_language(ln) if (type=='baskets'): msg = _("You are logged in as a guest user, so your baskets will disappear at the end of the current session.") + ' ' elif (type=='alerts'): msg = _("You are logged in as a guest user, so your alerts will disappear at the end of the current session.") + ' ' msg += _("If you wish you can %(x_url_open)slogin or register here%(x_url_close)s.") % {'x_url_open': '', 'x_url_close': ''} return """
    %s
    """ % msg def tmpl_account_body(self, ln, user): """ Displays the body of the actions of the user Parameters: - 'ln' *string* - The language to display the interface in - 'user' *string* - The username (nickname or email) """ # load the right message language _ = gettext_set_language(ln) out = _("You are logged in as %(x_user)s. You may want to a) %(x_url1_open)slogout%(x_url1_close)s; b) edit your %(x_url2_open)saccount settings%(x_url2_close)s.") %\ {'x_user': user, 'x_url1_open': '', 'x_url1_close': '', 'x_url2_open': '', 'x_url2_close': '', } return out + "

    " def tmpl_account_template(self, title, body, ln, url): """ Displays a block of the your account page Parameters: - 'ln' *string* - The language to display the interface in - 'title' *string* - The title of the block - 'body' *string* - The body of the block - 'url' *string* - The URL to go to the proper section """ out ="""
    %s
    %s
    """ % (url, title, body) return out def tmpl_account_page(self, ln, warnings, warning_list, accBody, baskets, alerts, searches, messages, loans, groups, submissions, approvals, tickets, administrative): """ Displays the your account page Parameters: - 'ln' *string* - The language to display the interface in - 'accBody' *string* - The body of the heading block - 'baskets' *string* - The body of the baskets block - 'alerts' *string* - The body of the alerts block - 'searches' *string* - The body of the searches block - 'messages' *string* - The body of the messages block - 'groups' *string* - The body of the groups block - 'submissions' *string* - The body of the submission block - 'approvals' *string* - The body of the approvals block - 'administrative' *string* - The body of the administrative block """ # load the right message language _ = gettext_set_language(ln) out = "" if warnings == "1": out += self.tmpl_general_warnings(warning_list) out += self.tmpl_account_template(_("Your Account"), accBody, ln, '/youraccount/edit?ln=%s' % ln) if messages: out += self.tmpl_account_template(_("Your Messages"), messages, ln, '/yourmessages/display?ln=%s' % ln) if loans: out += self.tmpl_account_template(_("Your Loans"), loans, ln, '/yourloans/display?ln=%s' % ln) if baskets: out += self.tmpl_account_template(_("Your Baskets"), baskets, ln, '/yourbaskets/display?ln=%s' % ln) if alerts: out += self.tmpl_account_template(_("Your Alert Searches"), alerts, ln, '/youralerts/list?ln=%s' % ln) if searches: out += self.tmpl_account_template(_("Your Searches"), searches, ln, '/youralerts/display?ln=%s' % ln) if groups: groups_description = _("You can consult the list of %(x_url_open)syour groups%(x_url_close)s you are administering or are a member of.") groups_description %= {'x_url_open': '', 'x_url_close': ''} out += self.tmpl_account_template(_("Your Groups"), groups_description, ln, '/yourgroups/display?ln=%s' % ln) if submissions: submission_description = _("You can consult the list of %(x_url_open)syour submissions%(x_url_close)s and inquire about their status.") submission_description %= {'x_url_open': '', 'x_url_close': ''} out += self.tmpl_account_template(_("Your Submissions"), submission_description, ln, '/yoursubmissions.py?ln=%s' % ln) if approvals: approval_description = _("You can consult the list of %(x_url_open)syour approvals%(x_url_close)s with the documents you approved or refereed.") approval_description %= {'x_url_open': '', 'x_url_close': ''} out += self.tmpl_account_template(_("Your Approvals"), approval_description, ln, '/yourapprovals.py?ln=%s' % ln) #check if this user might have tickets if tickets: ticket_description = _("You can consult the list of %(x_url_open)syour tickets%(x_url_close)s.") ticket_description %= {'x_url_open': '', 'x_url_close': ''} out += self.tmpl_account_template(_("Your Tickets"), ticket_description, ln, '/yourtickets?ln=%s' % ln) if administrative: out += self.tmpl_account_template(_("Your Administrative Activities"), administrative, ln, '/admin') return out def tmpl_account_emailMessage(self, ln, msg): """ Displays a link to retrieve the lost password Parameters: - 'ln' *string* - The language to display the interface in - 'msg' *string* - Explicative message on top of the form. """ # load the right message language _ = gettext_set_language(ln) out ="" out +=""" %(msg)s %(try_again)s """ % { 'ln' : ln, 'msg' : msg, 'try_again' : _("Try again") } return out def tmpl_account_reset_password_email_body(self, email, reset_key, ip_address, ln=CFG_SITE_LANG): """ The body of the email that sends lost internal account passwords to users. """ _ = gettext_set_language(ln) out = """ %(intro)s %(intro2)s <%(link)s> %(outro)s %(outro2)s""" % { 'intro': _("Somebody (possibly you) coming from %(x_ip_address)s " "has asked\nfor a password reset at %(x_sitename)s\nfor " "the account \"%(x_email)s\"." % { 'x_sitename' :CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME), 'x_email' : email, 'x_ip_address' : ip_address, } ), 'intro2' : _("If you want to reset the password for this account, please go to:"), 'link' : "%s/youraccount/access%s" % (CFG_SITE_SECURE_URL, make_canonical_urlargd({ 'ln' : ln, 'mailcookie' : reset_key }, {})), 'outro' : _("in order to confirm the validity of this request."), 'outro2' : _("Please note that this URL will remain valid for about %(days)s days only.") % {'days': CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS}, } return out def tmpl_account_address_activation_email_body(self, email, address_activation_key, ip_address, ln=CFG_SITE_LANG): """ The body of the email that sends email address activation cookie passwords to users. """ _ = gettext_set_language(ln) out = """ %(intro)s %(intro2)s <%(link)s> %(outro)s %(outro2)s""" % { 'intro': _("Somebody (possibly you) coming from %(x_ip_address)s " "has asked\nto register a new account at %(x_sitename)s\nfor the " "email address \"%(x_email)s\"." % { 'x_sitename' :CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME), 'x_email' : email, 'x_ip_address' : ip_address, } ), 'intro2' : _("If you want to complete this account registration, please go to:"), 'link' : "%s/youraccount/access%s" % (CFG_SITE_SECURE_URL, make_canonical_urlargd({ 'ln' : ln, 'mailcookie' : address_activation_key }, {})), 'outro' : _("in order to confirm the validity of this request."), 'outro2' : _("Please note that this URL will remain valid for about %(days)s days only.") % {'days' : CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS}, } return out def tmpl_account_emailSent(self, ln, email): """ Displays a confirmation message for an email sent Parameters: - 'ln' *string* - The language to display the interface in - 'email' *string* - The email to which the message has been sent """ # load the right message language _ = gettext_set_language(ln) out ="" out += _("Okay, a password reset link has been emailed to %s.") % email return out def tmpl_account_delete(self, ln): """ Displays a confirmation message about deleting the account Parameters: - 'ln' *string* - The language to display the interface in """ # load the right message language _ = gettext_set_language(ln) out = "

    " + _("""Deleting your account""") + '

    ' return out def tmpl_account_logout(self, ln): """ Displays a confirmation message about logging out Parameters: - 'ln' *string* - The language to display the interface in """ # load the right message language _ = gettext_set_language(ln) out = _("You are no longer recognized by our system.") + ' ' if CFG_EXTERNAL_AUTH_USING_SSO and CFG_EXTERNAL_AUTH_LOGOUT_SSO: out += _("""You are still recognized by the centralized %(x_fmt_open)sSSO%(x_fmt_close)s system. You can %(x_url_open)slogout from SSO%(x_url_close)s, too.""") % \ {'x_fmt_open' : '', 'x_fmt_close' : '', 'x_url_open' : '' % CFG_EXTERNAL_AUTH_LOGOUT_SSO, 'x_url_close' : ''} out += '
    ' out += _("If you wish you can %(x_url_open)slogin here%(x_url_close)s.") % \ {'x_url_open': '', 'x_url_close': ''} return out def tmpl_login_form(self, ln, referer, internal, register_available, methods, selected_method, msg=None): """ Displays a login form Parameters: - 'ln' *string* - The language to display the interface in - 'referer' *string* - The referer URL - will be redirected upon after login - 'internal' *boolean* - If we are producing an internal authentication - 'register_available' *boolean* - If users can register freely in the system - 'methods' *array* - The available authentication methods - 'selected_method' *string* - The default authentication method - 'msg' *string* - The message to print before the form, if needed """ # load the right message language _ = gettext_set_language(ln) if msg is "": out = "

    %(please_login)s

    " % { 'please_login' : cgi.escape(_("If you already have an account, please login using the form below.")) } if CFG_CERN_SITE: out += "

    " + _("If you don't own a CERN account yet, you can register a %(x_url_open)snew CERN lightweight account%(x_url_close)s.") % {'x_url_open' : '', 'x_url_close' : ''} + "

    " else: if register_available: out += "

    "+_("If you don't own an account yet, please %(x_url_open)sregister%(x_url_close)s an internal account.") %\ {'x_url_open': '', 'x_url_close': ''} + "

    " else: # users cannot register accounts, so advise them # how to get one, or be silent about register # facility if account level is more than 4: if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 5: out += "

    " + _("If you don't own an account yet, please contact %s.") % ('%s' % (cgi.escape(CFG_SITE_SUPPORT_EMAIL, True), cgi.escape(CFG_SITE_SUPPORT_EMAIL))) + "

    " else: out = "

    %s

    " % msg out += """
    """ if len(methods) > 1: # more than one method, must make a select login_select = """" out += """ """ % { 'login_title' : cgi.escape(_("Login method:")), 'login_select' : login_select, } else: # only one login method available out += """""" % cgi.escape(methods[0], True) out += """
    %(login_select)s
    """ % { 'ln': cgi.escape(ln, True), 'referer' : cgi.escape(referer, True), 'username' : cgi.escape(_("Username")), 'password' : cgi.escape(_("Password")), 'remember_me' : cgi.escape(_("Remember login on this computer.")), 'login' : cgi.escape(_("login")), } if internal: out += """   (%(lost_pass)s)""" % { 'ln' : cgi.escape(ln, True), 'lost_pass' : cgi.escape(_("Lost your password?")) } out += """
    """ out += """

    %(note)s: %(note_text)s

    """ % { 'note' : cgi.escape(_("Note")), 'note_text': cgi.escape(_("You can use your nickname or your email address to login."))} return out def tmpl_lost_your_password_teaser(self, ln=CFG_SITE_LANG): """Displays a short sentence to attract user to the fact that maybe he lost his password. Used by the registration page. """ _ = gettext_set_language(ln) out = "" out += """%(maybe_lost_pass)s""" % { 'ln' : ln, 'maybe_lost_pass': ("Maybe you have lost your password?") } return out def tmpl_reset_password_form(self, ln, email, reset_key, msg=''): """Display a form to reset the password.""" _ = gettext_set_language(ln) out = "" out = "

    %s

    " % _("Your request is valid. Please set the new " "desired password in the following form.") if msg: out += """

    %s

    """ % msg out += """
    %(set_password_for)s:%(email)s
    """ % { 'ln' : ln, 'reset_key' : reset_key, 'email' : email, 'set_password_for' : _('Set a new password for'), 'type_new_password' : _('Type the new password'), 'type_it_again' : _('Type again the new password'), 'set_new_password' : _('Set the new password') } return out def tmpl_register_page(self, ln, referer, level): """ Displays a login form Parameters: - 'ln' *string* - The language to display the interface in - 'referer' *string* - The referer URL - will be redirected upon after login - 'level' *int* - Login level (0 - all access, 1 - accounts activated, 2+ - no self-registration) """ # load the right message language _ = gettext_set_language(ln) out = "" if level <= 1: out += _("Please enter your email address and desired nickname and password:") if level == 1: out += _("It will not be possible to use the account before it has been verified and activated.") out += """

    (%(mandatory)s)

    %(example)s: john.doe@example.com

    (%(mandatory)s)

    %(example)s: johnd

    (%(optional)s)

    %(note)s: %(password_contain)s

    %(note)s: %(explain_acc)s""" % { 'referer' : cgi.escape(referer), 'ln' : cgi.escape(ln), 'email_address' : _("Email address"), 'nickname' : _("Nickname"), 'password' : _("Password"), 'mandatory' : _("mandatory"), 'optional' : _("optional"), 'example' : _("Example"), 'note' : _("Note"), 'password_contain' : _("The password phrase may contain punctuation, spaces, etc."), 'retype' : _("Retype Password"), 'register' : _("register"), 'explain_acc' : _("Please do not use valuable passwords such as your Unix, AFS or NICE passwords with this service. Your email address will stay strictly confidential and will not be disclosed to any third party. It will be used to identify you for personal services of %s. For example, you may set up an automatic alert search that will look for new preprints and will notify you daily of new arrivals by email.") % CFG_SITE_NAME, } else: # level >=2, so users cannot register accounts out += "

    " + _("It is not possible to create an account yourself. Contact %s if you want an account.") % ('%s' % (CFG_SITE_SUPPORT_EMAIL, CFG_SITE_SUPPORT_EMAIL)) + "

    " return out def tmpl_account_adminactivities(self, ln, uid, guest, roles, activities): """ Displays the admin activities block for this user Parameters: - 'ln' *string* - The language to display the interface in - 'uid' *string* - The used id - 'guest' *boolean* - If the user is guest - 'roles' *array* - The current user roles - 'activities' *array* - The user allowed activities """ # load the right message language _ = gettext_set_language(ln) out = "" # guest condition if guest: return _("You seem to be a guest user. You have to %(x_url_open)slogin%(x_url_close)s first.") % \ {'x_url_open': '', 'x_url_close': ''} # no rights condition if not roles: return "

    " + _("You are not authorized to access administrative functions.") + "

    " # displaying form out += "

    " + _("You are enabled to the following roles: %(x_role)s.") % {'x_role': ('' + ", ".join(roles) + "")} + '

    ' if activities: # print proposed links: activities.sort(lambda x, y: cmp(x.lower(), y.lower())) tmp_out = '' for action in activities: if action == "runbibedit": tmp_out += """
       
    %s""" % (CFG_SITE_URL, CFG_SITE_RECORD, _("Run Record Editor")) if action == "runbibeditmulti": tmp_out += """
        %s""" % (CFG_SITE_URL, CFG_SITE_RECORD, _("Run Multi-Record Editor")) if action == "runbibcirculation": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Run BibCirculation")) if action == "runbibmerge": tmp_out += """
        %s""" % (CFG_SITE_URL, CFG_SITE_RECORD, _("Run Record Merger")) if action == "runbibswordclient": tmp_out += """
        %s""" % (CFG_SITE_URL, CFG_SITE_RECORD, _("Run BibSword Client")) if action == "runbatchuploader": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Run Batch Uploader")) if action == "cfgbibformat": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure BibFormat")) tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure BibKnowledge")) if action == "cfgoaiharvest": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure OAI Harvest")) if action == "cfgoairepository": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure OAI Repository")) if action == "cfgbibindex": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure BibIndex")) if action == "cfgbibrank": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure BibRank")) if action == "cfgwebaccess": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure WebAccess")) if action == "cfgwebcomment": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure WebComment")) if action == "cfgweblinkback": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure WebLinkback")) if action == "cfgwebjournal": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure WebJournal")) if action == "cfgwebsearch": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure WebSearch")) if action == "cfgwebsubmit": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure WebSubmit")) if action == "runbibdocfile": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Run Document File Manager")) if action == "cfgbibsort": tmp_out += """
        %s""" % (CFG_SITE_URL, ln, _("Configure BibSort")) if tmp_out: out += _("Here are some interesting web admin links for you:") + tmp_out out += "
    " + _("For more admin-level activities, see the complete %(x_url_open)sAdmin Area%(x_url_close)s.") %\ {'x_url_open': '', 'x_url_close': ''} return out def tmpl_create_userinfobox(self, ln, url_referer, guest, username, submitter, referee, admin, usebaskets, usemessages, usealerts, usegroups, useloans, usestats): """ Displays the user block Parameters: - 'ln' *string* - The language to display the interface in - 'url_referer' *string* - URL of the page being displayed - 'guest' *boolean* - If the user is guest - 'username' *string* - The username (nickname or email) - 'submitter' *boolean* - If the user is submitter - 'referee' *boolean* - If the user is referee - 'admin' *boolean* - If the user is admin - 'usebaskets' *boolean* - If baskets are enabled for the user - 'usemessages' *boolean* - If messages are enabled for the user - 'usealerts' *boolean* - If alerts are enabled for the user - 'usegroups' *boolean* - If groups are enabled for the user - 'useloans' *boolean* - If loans are enabled for the user - 'usestats' *boolean* - If stats are enabled for the user @note: with the update of CSS classes (cds.cds -> invenio.css), the variables useloans etc are not used in this function, since they are in the menus. But we keep them in the function signature for backwards compatibility. """ # load the right message language _ = gettext_set_language(ln) out = """ """ % CFG_SITE_URL if guest: out += """%(guest_msg)s :: %(login)s""" % { 'sitesecureurl': CFG_SITE_SECURE_URL, 'ln' : ln, 'guest_msg' : _("guest"), 'referer' : url_referer and ('&referer=%s' % urllib.quote(url_referer)) or '', 'login' : _('login') } else: out += """ %(username)s :: """ % { 'sitesecureurl' : CFG_SITE_SECURE_URL, 'ln' : ln, 'username' : username } out += """%(logout)s""" % { 'sitesecureurl' : CFG_SITE_SECURE_URL, 'ln' : ln, 'logout' : _("logout"), } return out def tmpl_create_useractivities_menu(self, ln, selected, url_referer, guest, username, submitter, referee, admin, usebaskets, usemessages, usealerts, usegroups, useloans, usestats): """ Returns the main navigation menu with actions based on user's priviledges @param ln: The language to display the interface in @type ln: string @param selected: If the menu is currently selected @type selected: boolean @param url_referer: URL of the page being displayed @type url_referer: string @param guest: If the user is guest @type guest: string @param username: The username (nickname or email) @type username: string @param submitter: If the user is submitter @type submitter: boolean @param referee: If the user is referee @type referee: boolean @param admin: If the user is admin @type admin: boolean @param usebaskets: If baskets are enabled for the user @type usebaskets: boolean @param usemessages: If messages are enabled for the user @type usemessages: boolean @param usealerts: If alerts are enabled for the user @type usealerts: boolean @param usegroups: If groups are enabled for the user @type usegroups: boolean @param useloans: If loans are enabled for the user @type useloans: boolean @param usestats: If stats are enabled for the user @type usestats: boolean @return: html menu of the user activities @rtype: string """ # load the right message language _ = gettext_set_language(ln) out = '''
    %(personalize)s
      ''' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'personalize': _("Personalize"), 'on': selected and " on" or '', 'selected': selected and "selected" or '' } if not guest: out += '
    • %(account)s
    • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'account' : _('Your account') } if usealerts or guest: out += '
    • %(alerts)s
    • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'alerts' : _('Your alerts') } if referee: out += '
    • %(approvals)s
    • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'approvals' : _('Your approvals') } if usebaskets or guest: out += '
    • %(baskets)s
    • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'baskets' : _('Your baskets') } if usegroups: out += '
    • %(groups)s
    • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'groups' : _('Your groups') } if useloans: out += '
    • %(loans)s
    • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'loans' : _('Your loans') } if usemessages: out += '
    • %(messages)s
    • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'messages' : _('Your messages') } if submitter: out += '
    • %(submissions)s
    • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'submissions' : _('Your submissions') } if usealerts or guest: out += '
    • %(searches)s
    • ' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'searches' : _('Your searches') } out += '
    ' return out def tmpl_create_adminactivities_menu(self, ln, selected, url_referer, guest, username, submitter, referee, admin, usebaskets, usemessages, usealerts, usegroups, useloans, usestats, activities): """ Returns the main navigation menu with actions based on user's priviledges @param ln: The language to display the interface in @type ln: string @param selected: If the menu is currently selected @type selected: boolean @param url_referer: URL of the page being displayed @type url_referer: string @param guest: If the user is guest @type guest: string @param username: The username (nickname or email) @type username: string @param submitter: If the user is submitter @type submitter: boolean @param referee: If the user is referee @type referee: boolean @param admin: If the user is admin @type admin: boolean @param usebaskets: If baskets are enabled for the user @type usebaskets: boolean @param usemessages: If messages are enabled for the user @type usemessages: boolean @param usealerts: If alerts are enabled for the user @type usealerts: boolean @param usegroups: If groups are enabled for the user @type usegroups: boolean @param useloans: If loans are enabled for the user @type useloans: boolean @param usestats: If stats are enabled for the user @type usestats: boolean @param activities: dictionary of admin activities @rtype activities: dict @return: html menu of the user activities @rtype: string """ # load the right message language _ = gettext_set_language(ln) out = '' if activities: out += '''
    %(admin)s
      ''' % { 'CFG_SITE_SECURE_URL' : CFG_SITE_SECURE_URL, 'ln' : ln, 'admin': _("Administration"), 'on': selected and " on" or '', 'selected': selected and "selected" or '' } for name in sorted(activities.iterkeys()): url = activities[name] out += '
    • %(name)s
    • ' % { 'url': url, 'name': name } if usestats: out += """
    • %(stats)s
    • """ % { 'CFG_SITE_URL' : CFG_SITE_URL, 'ln' : ln, 'stats' : _("Statistics"), } out += '
    ' return out def tmpl_warning(self, warnings, ln=CFG_SITE_LANG): """ Display len(warnings) warning fields @param infos: list of strings @param ln=language @return: html output """ if not((type(warnings) is list) or (type(warnings) is tuple)): warnings = [warnings] warningbox = "" if warnings != []: warningbox = "
    \n Warning:\n" for warning in warnings: lines = warning.split("\n") warningbox += "

    " for line in lines[0:-1]: warningbox += line + "
    \n" warningbox += lines[-1] + "

    " warningbox += "

    \n" return warningbox def tmpl_error(self, error, ln=CFG_SITE_LANG): """ Display error @param error: string @param ln=language @return: html output """ _ = gettext_set_language(ln) errorbox = "" if error != "": errorbox = "
    \n Error:\n" errorbox += "

    " errorbox += error + "

    " errorbox += "

    \n" return errorbox def tmpl_display_all_groups(self, infos, admin_group_html, member_group_html, external_group_html = None, warnings=[], ln=CFG_SITE_LANG): """ Displays the 3 tables of groups: admin, member and external Parameters: - 'ln' *string* - The language to display the interface in - 'admin_group_html' *string* - HTML code for displaying all the groups the user is the administrator of - 'member_group_html' *string* - HTML code for displaying all the groups the user is member of - 'external_group_html' *string* - HTML code for displaying all the external groups the user is member of """ _ = gettext_set_language(ln) group_text = self.tmpl_infobox(infos) group_text += self.tmpl_warning(warnings) if external_group_html: group_text += """
    %s

    %s

    %s
    """ %(admin_group_html, member_group_html, external_group_html) else: group_text += """
    %s

    %s
    """ %(admin_group_html, member_group_html) return group_text def tmpl_display_admin_groups(self, groups, ln=CFG_SITE_LANG): """ Display the groups the user is admin of. Parameters: - 'ln' *string* - The language to display the interface in - 'groups' *list* - All the group the user is admin of - 'infos' *list* - Display infos on top of admin group table """ _ = gettext_set_language(ln) img_link = """ %(text)s
    %(text)s
    """ out = self.tmpl_group_table_title(img="/img/group_admin.png", text=_("You are an administrator of the following groups:") ) out += """ """ %(_("Group"), _("Description")) if len(groups) == 0: out += """ """ %(_("You are not an administrator of any groups."),) for group_data in groups: (grpID, name, description) = group_data edit_link = img_link % {'siteurl' : CFG_SITE_URL, 'grpID' : grpID, 'ln': ln, 'img':"webbasket_create_small.png", 'text':_("Edit group"), 'action':"edit" } members_link = img_link % {'siteurl' : CFG_SITE_URL, 'grpID' : grpID, 'ln': ln, 'img':"webbasket_usergroup.png", 'text':_("Edit %s members") % '', 'action':"members" } out += """ """ % (cgi.escape(name), cgi.escape(description), edit_link, members_link) out += """
    %s %s    
    %s
    %s %s %s %s
         
    """ % {'ln': ln, 'write_label': _("Create new group"), } return out def tmpl_display_member_groups(self, groups, ln=CFG_SITE_LANG): """ Display the groups the user is member of. Parameters: - 'ln' *string* - The language to display the interface in - 'groups' *list* - All the group the user is member of """ _ = gettext_set_language(ln) group_text = self.tmpl_group_table_title(img="/img/webbasket_us.png", text=_("You are a member of the following groups:")) group_text += """ """ % (_("Group"), _("Description")) if len(groups) == 0: group_text += """ """ %(_("You are not a member of any groups."),) for group_data in groups: (id, name, description) = group_data group_text += """ """ % (cgi.escape(name), cgi.escape(description)) group_text += """
    %s %s
    %s
    %s %s
    """ % {'ln': ln, 'join_label': _("Join new group"), 'leave_label':_("Leave group") } return group_text def tmpl_display_external_groups(self, groups, ln=CFG_SITE_LANG): """ Display the external groups the user is member of. Parameters: - 'ln' *string* - The language to display the interface in - 'groups' *list* - All the group the user is member of """ _ = gettext_set_language(ln) group_text = self.tmpl_group_table_title(img="/img/webbasket_us.png", text=_("You are a member of the following external groups:")) group_text += """ """ % (_("Group"), _("Description")) if len(groups) == 0: group_text += """ """ %(_("You are not a member of any external groups."),) for group_data in groups: (id, name, description) = group_data group_text += """ """ % (cgi.escape(name), cgi.escape(description)) group_text += """
    %s %s
    %s
    %s %s
    """ return group_text def tmpl_display_input_group_info(self, group_name, group_description, join_policy, act_type="create", grpID=None, warnings=[], ln=CFG_SITE_LANG): """ Display group data when creating or updating a group: Name, description, join_policy. Parameters: - 'ln' *string* - The language to display the interface in - 'group_name' *string* - name of the group - 'group_description' *string* - description of the group - 'join_policy' *string* - join policy - 'act_type' *string* - info about action : create or edit(update) - 'grpID' *int* - ID of the group(not None in case of group editing) - 'warnings' *list* - Display warning if values are not correct """ _ = gettext_set_language(ln) #default hidden_id ="" form_name = "create_group" action = CFG_SITE_URL + '/yourgroups/create' button_label = _("Create new group") button_name = "create_button" label = _("Create new group") delete_text = "" if act_type == "update": form_name = "update_group" action = CFG_SITE_URL + '/yourgroups/edit' button_label = _("Update group") button_name = "update" label = _('Edit group %s') % cgi.escape(group_name) delete_text = """""" delete_text %= (_("Delete group"),"delete") if grpID is not None: hidden_id = """""" hidden_id %= grpID out = self.tmpl_warning(warnings) out += """
    %(label)s %(label)s
    %(join_policy_label)s %(join_policy)s
    %(hidden_id)s
    %(delete_text)s
    """ out %= {'action' : action, 'logo': CFG_SITE_URL + '/img/webbasket_create.png', 'label': label, 'form_name' : form_name, 'name_label': _("Group name:"), 'delete_text': delete_text, 'description_label': _("Group description:"), 'join_policy_label': _("Group join policy:"), 'group_name': cgi.escape(group_name, 1), 'group_description': cgi.escape(group_description, 1), 'button_label': button_label, 'button_name':button_name, 'cancel_label':_("Cancel"), 'hidden_id':hidden_id, 'ln': ln, 'join_policy' :self.__create_join_policy_selection_menu("join_policy", join_policy, ln) } return out def tmpl_display_input_join_group(self, group_list, group_name, group_from_search, search, warnings=[], ln=CFG_SITE_LANG): """ Display the groups the user can join. He can use default select list or the search box Parameters: - 'ln' *string* - The language to display the interface in - 'group_list' *list* - All the group the user can join - 'group_name' *string* - Name of the group the user is looking for - 'group_from search' *list* - List of the group the user can join matching group_name - 'search' *int* - User is looking for group using group_name - 'warnings' *list* - Display warning if two group are selected """ _ = gettext_set_language(ln) out = self.tmpl_warning(warnings) search_content = "" if search: search_content = """ """ if group_from_search != []: search_content += self.__create_select_menu('grpID', group_from_search, _("Please select:")) else: search_content += _("No matching group") search_content += """ """ out += """
    %(label)s %(label)s
    %(search_content)s
    %(list_label)s %(group_list)s  



    """ out %= {'action' : CFG_SITE_URL + '/yourgroups/join', 'logo': CFG_SITE_URL + '/img/webbasket_create.png', 'label': _("Join group"), 'group_name': cgi.escape(group_name, 1), 'label2':_("or find it") + ': ', 'list_label':_("Choose group:"), 'ln': ln, 'find_label': _("Find group"), 'cancel_label':_("Cancel"), 'group_list' :self.__create_select_menu("grpID",group_list, _("Please select:")), 'search_content' : search_content } return out def tmpl_display_manage_member(self, grpID, group_name, members, pending_members, infos=[], warnings=[], ln=CFG_SITE_LANG): """Display current members and waiting members of a group. Parameters: - 'ln' *string* - The language to display the interface in - 'grpID *int* - ID of the group - 'group_name' *string* - Name of the group - 'members' *list* - List of the current members - 'pending_members' *list* - List of the waiting members - 'infos' *tuple of 2 lists* - Message to inform user about his last action - 'warnings' *list* - Display warning if two group are selected """ _ = gettext_set_language(ln) out = self.tmpl_warning(warnings) out += self.tmpl_infobox(infos) out += """

    %(title)s

    %(img_alt_header1)s %(header1)s
     
    %(member_text)s
    %(img_alt_header2)s %(header2)s
     
    %(pending_text)s
    %(img_alt_header3)s %(header3)s
     
    %(invite_text)s
    """ if members : member_list = self.__create_select_menu("member_id", members, _("Please select:")) member_text = """ %s """ % (member_list,_("Remove member")) else : member_text = """%s""" % _("No members.") if pending_members : pending_list = self.__create_select_menu("pending_member_id", pending_members, _("Please select:")) pending_text = """ %s """ % (pending_list,_("Accept member"), _("Reject member")) else : pending_text = """%s""" % _("No members awaiting approval.") header1 = self.tmpl_group_table_title(text=_("Current members")) header2 = self.tmpl_group_table_title(text=_("Members awaiting approval")) header3 = _("Invite new members") write_a_message_url = create_url( "%s/yourmessages/write" % CFG_SITE_URL, { 'ln' : ln, 'msg_subject' : _('Invitation to join "%s" group' % escape_html(group_name)), 'msg_body' : _("""\ Hello: I think you might be interested in joining the group "%(x_name)s". You can join by clicking here: %(x_url)s. Best regards. """) % {'x_name': group_name, 'x_url': create_html_link("%s/yourgroups/join" % CFG_SITE_URL, { 'grpID' : grpID, 'join_button' : "1", }, link_label=group_name, escape_urlargd=True, escape_linkattrd=True)}}) link_open = '' % escape_html(write_a_message_url) invite_text = _("If you want to invite new members to join your group, please use the %(x_url_open)sweb message%(x_url_close)s system.") % \ {'x_url_open': link_open, 'x_url_close': ''} action = CFG_SITE_URL + '/yourgroups/members?ln=' + ln out %= {'title':_('Group: %s') % escape_html(group_name), 'member_text' : member_text, 'pending_text' :pending_text, 'action':action, 'grpID':grpID, 'header1': header1, 'header2': header2, 'header3': header3, 'img_alt_header1': _("Current members"), 'img_alt_header2': _("Members awaiting approval"), 'img_alt_header3': _("Invite new members"), 'invite_text': invite_text, 'imgurl': CFG_SITE_URL + '/img', 'cancel_label':_("Cancel"), 'ln':ln } return out def tmpl_display_input_leave_group(self, groups, warnings=[], ln=CFG_SITE_LANG): """Display groups the user can leave. Parameters: - 'ln' *string* - The language to display the interface in - 'groups' *list* - List of groups the user is currently member of - 'warnings' *list* - Display warning if no group is selected """ _ = gettext_set_language(ln) out = self.tmpl_warning(warnings) out += """
    %(label)s %(label)s
    %(list_label)s %(groups)s  
    %(submit)s
    """ if groups: groups = self.__create_select_menu("grpID", groups, _("Please select:")) list_label = _("Group list") submit = """""" % _("Leave group") else : groups = _("You are not member of any group.") list_label = "" submit = "" action = CFG_SITE_URL + '/yourgroups/leave?ln=%s' action %= (ln) out %= {'groups' : groups, 'list_label' : list_label, 'action':action, 'logo': CFG_SITE_URL + '/img/webbasket_create.png', 'label' : _("Leave group"), 'cancel_label':_("Cancel"), 'ln' :ln, 'submit' : submit } return out def tmpl_confirm_delete(self, grpID, ln=CFG_SITE_LANG): """ display a confirm message when deleting a group @param grpID *int* - ID of the group @param ln: language @return: html output """ _ = gettext_set_language(ln) action = CFG_SITE_URL + '/yourgroups/edit' out = """
    %(message)s
    """% {'message': _("Are you sure you want to delete this group?"), 'ln':ln, 'yes_label': _("Yes"), 'no_label': _("No"), 'grpID':grpID, 'action': action } return out def tmpl_confirm_leave(self, uid, grpID, ln=CFG_SITE_LANG): """ display a confirm message @param grpID *int* - ID of the group @param ln: language @return: html output """ _ = gettext_set_language(ln) action = CFG_SITE_URL + '/yourgroups/leave' out = """
    %(message)s
    """% {'message': _("Are you sure you want to leave this group?"), 'ln':ln, 'yes_label': _("Yes"), 'no_label': _("No"), 'grpID':grpID, 'action': action } return out def __create_join_policy_selection_menu(self, name, current_join_policy, ln=CFG_SITE_LANG): """Private function. create a drop down menu for selection of join policy @param current_join_policy: join policy as defined in CFG_WEBSESSION_GROUP_JOIN_POLICY @param ln: language """ _ = gettext_set_language(ln) elements = [(CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEOPEN'], _("Visible and open for new members")), (CFG_WEBSESSION_GROUP_JOIN_POLICY['VISIBLEMAIL'], _("Visible but new members need approval")) ] select_text = _("Please select:") return self.__create_select_menu(name, elements, select_text, selected_key=current_join_policy) def __create_select_menu(self, name, elements, select_text, multiple=0, selected_key=None): """ private function, returns a popup menu @param name: name of HTML control @param elements: list of (key, value) """ if multiple : out = """ """ % name out += '' % (select_text) for (key, label) in elements: selected = '' if key == selected_key: selected = ' selected="selected"' out += ''% (key, selected, label) out += '' return out def tmpl_infobox(self, infos, ln=CFG_SITE_LANG): """Display len(infos) information fields @param infos: list of strings @param ln=language @return: html output """ _ = gettext_set_language(ln) if not((type(infos) is list) or (type(infos) is tuple)): infos = [infos] infobox = "" for info in infos: infobox += '
    ' lines = info.split("\n") for line in lines[0:-1]: infobox += line + "
    \n" infobox += lines[-1] + "
    \n" return infobox def tmpl_navtrail(self, ln=CFG_SITE_LANG, title=""): """ display the navtrail, e.g.: Your account > Your group > title @param title: the last part of the navtrail. Is not a link @param ln: language return html formatted navtrail """ _ = gettext_set_language(ln) nav_h1 = '%s' nav_h2 = "" if (title != ""): nav_h2 = ' > %s' nav_h2 = nav_h2 % (CFG_SITE_URL, _("Your Groups")) return nav_h1 % (CFG_SITE_URL, _("Your Account")) + nav_h2 def tmpl_group_table_title(self, img="", text="", ln=CFG_SITE_LANG): """ display the title of a table: - 'img' *string* - img path - 'text' *string* - title - 'ln' *string* - The language to display the interface in """ out = "
    " if img: out += """ """ % (CFG_SITE_URL + img) out += """ %s
    """ % text return out def tmpl_admin_msg(self, group_name, grpID, ln=CFG_SITE_LANG): """ return message content for joining group - 'group_name' *string* - name of the group - 'grpID' *int* - ID of the group - 'ln' *string* - The language to display the interface in """ _ = gettext_set_language(ln) subject = _("Group %s: New membership request") % group_name url = CFG_SITE_URL + "/yourgroups/members?grpID=%s&ln=%s" url %= (grpID, ln) # FIXME: which user? We should show his nickname. body = (_("A user wants to join the group %s.") % group_name) + '
    ' body += _("Please %(x_url_open)saccept or reject%(x_url_close)s this user's request.") % {'x_url_open': '', 'x_url_close': ''} body += '
    ' return subject, body def tmpl_member_msg(self, group_name, accepted=0, ln=CFG_SITE_LANG): """ return message content when new member is accepted/rejected - 'group_name' *string* - name of the group - 'accepted' *int* - 1 if new membership has been accepted, 0 if it has been rejected - 'ln' *string* - The language to display the interface in """ _ = gettext_set_language(ln) if accepted: subject = _("Group %s: Join request has been accepted") % (group_name) body = _("Your request for joining group %s has been accepted.") % (group_name) else: subject = _("Group %s: Join request has been rejected") % (group_name) body = _("Your request for joining group %s has been rejected.") % (group_name) url = CFG_SITE_URL + "/yourgroups/display?ln=" + ln body += '
    ' body += _("You can consult the list of %(x_url_open)syour groups%(x_url_close)s.") % {'x_url_open': '', 'x_url_close': ''} body += '
    ' return subject, body def tmpl_delete_msg(self, group_name, ln=CFG_SITE_LANG): """ return message content when new member is accepted/rejected - 'group_name' *string* - name of the group - 'ln' *string* - The language to display the interface in """ _ = gettext_set_language(ln) subject = _("Group %s has been deleted") % group_name url = CFG_SITE_URL + "/yourgroups/display?ln=" + ln body = _("Group %s has been deleted by its administrator.") % group_name body += '
    ' body += _("You can consult the list of %(x_url_open)syour groups%(x_url_close)s.") % {'x_url_open': '', 'x_url_close': ''} body += '
    ' return subject, body def tmpl_group_info(self, nb_admin_groups=0, nb_member_groups=0, nb_total_groups=0, ln=CFG_SITE_LANG): """ display infos about groups (used by myaccount.py) @param nb_admin_group: number of groups the user is admin of @param nb_member_group: number of groups the user is member of @param total_group: number of groups the user belongs to @param ln: language return: html output. """ _ = gettext_set_language(ln) out = _("You can consult the list of %(x_url_open)s%(x_nb_total)i groups%(x_url_close)s you are subscribed to (%(x_nb_member)i) or administering (%(x_nb_admin)i).") out %= {'x_url_open': '', 'x_nb_total': nb_total_groups, 'x_url_close': '', 'x_nb_admin': nb_admin_groups, 'x_nb_member': nb_member_groups} return out def tmpl_general_warnings(self, warning_list, ln=CFG_SITE_LANG): """ display information to the admin user about possible ssecurity problems in the system. """ message = "" _ = gettext_set_language(ln) #Try and connect to the mysql database with the default invenio password if "warning_mysql_password_equal_to_invenio_password" in warning_list: message += "

    " message += _("Warning: The password set for MySQL root user is the same as the default Invenio password. For security purposes, you may want to change the password.") message += "

    " #Try and connect to the invenio database with the default invenio password if "warning_invenio_password_equal_to_default" in warning_list: message += "

    " message += _("Warning: The password set for the Invenio MySQL user is the same as the shipped default. For security purposes, you may want to change the password.") message += "

    " #Check if the admin password is empty if "warning_empty_admin_password" in warning_list: message += "

    " message += _("Warning: The password set for the Invenio admin user is currently empty. For security purposes, it is strongly recommended that you add a password.") message += "

    " #Check if the admin email has been changed from the default if "warning_site_support_email_equal_to_default" in warning_list: message += "

    " message += _("Warning: The email address set for support email is currently set to info@invenio-software.org. It is recommended that you change this to your own address.") message += "

    " #Check for a new release if "note_new_release_available" in warning_list: message += "

    " message += _("A newer version of Invenio is available for download. You may want to visit ") message += "http://invenio-software.org/wiki/Installation/Download" message += "

    " #Error downloading release notes if "error_cannot_download_release_notes" in warning_list: message += "

    " message += _("Cannot download or parse release notes from http://invenio-software.org/repo/invenio/tree/RELEASE-NOTES") message += "

    " return message diff --git a/modules/websession/lib/websession_webinterface.py b/modules/websession/lib/websession_webinterface.py index 5468a1175..2e2cb11b2 100644 --- a/modules/websession/lib/websession_webinterface.py +++ b/modules/websession/lib/websession_webinterface.py @@ -1,1381 +1,1415 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. from invenio.webstat import register_customevent """Invenio ACCOUNT HANDLING""" __revision__ = "$Id$" __lastupdated__ = """$Date$""" import cgi from datetime import timedelta import os from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT, \ CFG_SITE_NAME, \ CFG_SITE_NAME_INTL, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_SECURE_URL, \ CFG_SITE_URL, \ CFG_CERN_SITE, \ CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS, \ CFG_OPENAIRE_SITE from invenio import webuser from invenio.webpage import page from invenio import webaccount from invenio import webbasket from invenio import webalert from invenio.dbquery import run_sql from invenio.webmessage import account_new_mail from invenio.access_control_engine import acc_authorize_action from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.webinterface_handler_config import SERVER_RETURN, HTTP_NOT_FOUND from invenio.urlutils import redirect_to_url, make_canonical_urlargd from invenio import webgroup from invenio import webgroup_dblayer from invenio.messages import gettext_set_language, wash_language from invenio.mailutils import send_email +from invenio.errorlib import register_exception from invenio.access_control_mailcookie import mail_cookie_retrieve_kind, \ mail_cookie_check_pw_reset, mail_cookie_delete_cookie, \ mail_cookie_create_pw_reset, mail_cookie_check_role, \ mail_cookie_check_mail_activation, InvenioWebAccessMailCookieError, \ InvenioWebAccessMailCookieDeletedError, mail_cookie_check_authorize_action from invenio.access_control_config import CFG_WEBACCESS_WARNING_MSGS, \ CFG_EXTERNAL_AUTH_USING_SSO, CFG_EXTERNAL_AUTH_LOGOUT_SSO, \ CFG_EXTERNAL_AUTHENTICATION, CFG_EXTERNAL_AUTH_SSO_REFRESH +from invenio import web_api_key + + import invenio.template websession_templates = invenio.template.load('websession') bibcatalog_templates = invenio.template.load('bibcatalog') + + class WebInterfaceYourAccountPages(WebInterfaceDirectory): _exports = ['', 'edit', 'change', 'lost', 'display', 'send_email', 'youradminactivities', 'access', 'delete', 'logout', 'login', 'register', 'resetpassword', - 'robotlogin', 'robotlogout'] + 'robotlogin', 'robotlogout', 'apikey'] _force_https = True def index(self, req, form): redirect_to_url(req, '%s/youraccount/display' % CFG_SITE_SECURE_URL) def access(self, req, form): args = wash_urlargd(form, {'mailcookie' : (str, '')}) _ = gettext_set_language(args['ln']) title = _("Mail Cookie Service") try: kind = mail_cookie_retrieve_kind(args['mailcookie']) if kind == 'pw_reset': redirect_to_url(req, '%s/youraccount/resetpassword?k=%s&ln=%s' % (CFG_SITE_SECURE_URL, args['mailcookie'], args['ln'])) elif kind == 'role': uid = webuser.getUid(req) try: (role_name, expiration) = mail_cookie_check_role(args['mailcookie'], uid) except InvenioWebAccessMailCookieDeletedError: return page(title=_("Role authorization request"), req=req, body=_("This request for an authorization has already been authorized."), uid=webuser.getUid(req), navmenuid='youraccount', language=args['ln'], secure_page_p=1) return page(title=title, body=webaccount.perform_back( _("You have successfully obtained an authorization as %(x_role)s! " "This authorization will last until %(x_expiration)s and until " "you close your browser if you are a guest user.") % {'x_role' : '%s' % role_name, 'x_expiration' : '%s' % expiration.strftime("%Y-%m-%d %H:%M:%S")}, '/youraccount/display?ln=%s' % args['ln'], _('login'), args['ln']), req=req, uid=webuser.getUid(req), language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount', secure_page_p=1) elif kind == 'mail_activation': try: email = mail_cookie_check_mail_activation(args['mailcookie']) if not email: raise StandardError webuser.confirm_email(email) body = "

    " + _("You have confirmed the validity of your email" " address!") + "

    " if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1: body += "

    " + _("Please, wait for the administrator to " "enable your account.") + "

    " else: uid = webuser.update_Uid(req, email) body += "

    " + _("You can now go to %(x_url_open)syour account page%(x_url_close)s.") % {'x_url_open' : '' % args['ln'], 'x_url_close' : ''} + "

    " return page(title=_("Email address successfully activated"), body=body, req=req, language=args['ln'], uid=webuser.getUid(req), lastupdated=__lastupdated__, navmenuid='youraccount', secure_page_p=1) except InvenioWebAccessMailCookieDeletedError, e: body = "

    " + _("You have already confirmed the validity of your email address!") + "

    " if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1: body += "

    " + _("Please, wait for the administrator to " "enable your account.") + "

    " else: body += "

    " + _("You can now go to %(x_url_open)syour account page%(x_url_close)s.") % {'x_url_open' : '' % args['ln'], 'x_url_close' : ''} + "

    " return page(title=_("Email address successfully activated"), body=body, req=req, language=args['ln'], uid=webuser.getUid(req), lastupdated=__lastupdated__, navmenuid='youraccount', secure_page_p=1) return webuser.page_not_authorized(req, "../youraccount/access", text=_("This request for confirmation of an email " "address is not valid or" " is expired."), navmenuid='youraccount') except InvenioWebAccessMailCookieError: return webuser.page_not_authorized(req, "../youraccount/access", text=_("This request for an authorization is not valid or" " is expired."), navmenuid='youraccount') def resetpassword(self, req, form): args = wash_urlargd(form, { 'k' : (str, ''), 'reset' : (int, 0), 'password' : (str, ''), 'password2' : (str, '') }) _ = gettext_set_language(args['ln']) title = _('Reset password') reset_key = args['k'] try: email = mail_cookie_check_pw_reset(reset_key) except InvenioWebAccessMailCookieDeletedError: return page(title=title, req=req, body=_("This request for resetting a password has already been used."), uid=webuser.getUid(req), navmenuid='youraccount', language=args['ln'], secure_page_p=1) except InvenioWebAccessMailCookieError: return webuser.page_not_authorized(req, "../youraccount/access", text=_("This request for resetting a password is not valid or" " is expired."), navmenuid='youraccount') if email is None or CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 3: return webuser.page_not_authorized(req, "../youraccount/resetpassword", text=_("This request for resetting the password is not valid or" " is expired."), navmenuid='youraccount') if not args['reset']: return page(title=title, body=webaccount.perform_reset_password(args['ln'], email, reset_key), req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') elif args['password'] != args['password2']: msg = _('The two provided passwords aren\'t equal.') return page(title=title, body=webaccount.perform_reset_password(args['ln'], email, reset_key, msg), req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') run_sql('UPDATE user SET password=AES_ENCRYPT(email,%s) WHERE email=%s', (args['password'], email)) mail_cookie_delete_cookie(reset_key) return page(title=title, body=webaccount.perform_back( _("The password was successfully set! " "You can now proceed with the login."), '/youraccount/login?ln=%s' % args['ln'], _('login'), args['ln']), req=req, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount', secure_page_p=1) def display(self, req, form): args = wash_urlargd(form, {}) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(args['ln']) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../youraccount/display", navmenuid='youraccount') if webuser.isGuestUser(uid): return page(title=_("Your Account"), body=webaccount.perform_info(req, args['ln']), description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') username = webuser.get_nickname_or_email(uid) user_info = webuser.collect_user_info(req) bask = user_info['precached_usebaskets'] and webbasket.account_list_baskets(uid, ln=args['ln']) or '' aler = user_info['precached_usealerts'] and webalert.account_list_alerts(uid, ln=args['ln']) or '' sear = webalert.account_list_searches(uid, ln=args['ln']) msgs = user_info['precached_usemessages'] and account_new_mail(uid, ln=args['ln']) or '' grps = user_info['precached_usegroups'] and webgroup.account_group(uid, ln=args['ln']) or '' appr = user_info['precached_useapprove'] sbms = user_info['precached_viewsubmissions'] loan = '' admn = webaccount.perform_youradminactivities(user_info, args['ln']) return page(title=_("Your Account"), body=webaccount.perform_display_account(req, username, bask, aler, sear, msgs, loan, grps, sbms, appr, admn, args['ln']), description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') + def apikey(self, req, form): + args = wash_urlargd(form, { + 'key_description' : (str, None), + 'key_id' : (str, None), + 'referer': (str, '') + }) + uid = webuser.getUid(req) + # load the right message language + _ = gettext_set_language(args['ln']) + + if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: + return webuser.page_not_authorized(req, "../youraccount/edit", + navmenuid='youraccount') + if webuser.isGuestUser(uid): + return webuser.page_not_authorized(req, "../youraccount/edit", + text=_("This functionality is forbidden to guest users."), + navmenuid='youraccount') + + if args['key_id']: + web_api_key.mark_web_api_key_as_removed(args['key_id']) + else: + uid = webuser.getUid(req) + web_api_key.create_new_web_api_key(uid, args['key_description']) + + if args['referer']: + redirect_to_url(req, args['referer']) + else: + redirect_to_url(req, '%s/youraccount/edit?ln=%s' % (CFG_SITE_SECURE_URL, args['ln'])) def edit(self, req, form): args = wash_urlargd(form, {"verbose" : (int, 0)}) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(args['ln']) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../youraccount/edit", navmenuid='youraccount') if webuser.isGuestUser(uid): return webuser.page_not_authorized(req, "../youraccount/edit", text=_("This functionality is forbidden to guest users."), navmenuid='youraccount') body = '' user_info = webuser.collect_user_info(req) if args['verbose'] == 9: keys = user_info.keys() keys.sort() for key in keys: body += "%s:%s
    " % (key, user_info[key]) #check if the user should see bibcatalog user name / passwd in the settings can_config_bibcatalog = (acc_authorize_action(user_info, 'runbibedit')[0] == 0) return page(title= _("Your Settings"), body=body+webaccount.perform_set(webuser.get_email(uid), args['ln'], can_config_bibcatalog, verbose=args['verbose']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description=_("%s Personalize, Your Settings") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') def change(self, req, form): args = wash_urlargd(form, { 'nickname': (str, None), 'email': (str, None), 'old_password': (str, None), 'password': (str, None), 'password2': (str, None), 'login_method': (str, ""), 'group_records' : (int, None), 'latestbox' : (int, None), 'helpbox' : (int, None), 'lang' : (str, None), 'bibcatalog_username' : (str, None), 'bibcatalog_password' : (str, None), }) ## Wash arguments: args['login_method'] = wash_login_method(args['login_method']) if args['email']: args['email'] = args['email'].lower() ## Load the right message language: _ = gettext_set_language(args['ln']) ## Identify user and load old preferences: uid = webuser.getUid(req) prefs = webuser.get_user_preferences(uid) ## Check rights: if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../youraccount/change", navmenuid='youraccount') # FIXME: the branching below is far from optimal. Should be # based on the submitted form name ids, to know precisely on # which form the user clicked. Not on the passed values, as # is the case now. The function body is too big and in bad # need of refactoring anyway. ## Will hold the output messages: mess = '' ## Would hold link to previous page and title for the link: act = None linkname = None title = None ## Change login method if needed: if args['login_method'] and CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 4 \ and args['login_method'] in CFG_EXTERNAL_AUTHENTICATION: title = _("Settings edited") act = "/youraccount/display?ln=%s" % args['ln'] linkname = _("Show account") if prefs['login_method'] != args['login_method']: if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 4: mess += '

    ' + _("Unable to change login method.") elif not CFG_EXTERNAL_AUTHENTICATION[args['login_method']]: # Switching to internal authentication: we drop any external datas p_email = webuser.get_email(uid) webuser.drop_external_settings(uid) webgroup_dblayer.drop_external_groups(uid) prefs['login_method'] = args['login_method'] webuser.set_user_preferences(uid, prefs) mess += "

    " + _("Switched to internal login method.") + " " mess += _("Please note that if this is the first time that you are using this account " "with the internal login method then the system has set for you " "a randomly generated password. Please click the " "following button to obtain a password reset request " "link sent to you via email:") + '

    ' mess += """

    """ % (p_email, _("Send Password")) else: res = run_sql("SELECT email FROM user WHERE id=%s", (uid,)) if res: email = res[0][0] else: email = None if not email: mess += '

    ' + _("Unable to switch to external login method %s, because your email address is unknown.") % cgi.escape(args['login_method']) else: try: if not CFG_EXTERNAL_AUTHENTICATION[args['login_method']].user_exists(email): mess += '

    ' + _("Unable to switch to external login method %s, because your email address is unknown to the external login system.") % cgi.escape(args['login_method']) else: prefs['login_method'] = args['login_method'] webuser.set_user_preferences(uid, prefs) mess += '

    ' + _("Login method successfully selected.") except AttributeError: mess += '

    ' + _("The external login method %s does not support email address based logins. Please contact the site administrators.") % cgi.escape(args['login_method']) ## Change email or nickname: if args['email'] or args['nickname']: uid2 = webuser.emailUnique(args['email']) uid_with_the_same_nickname = webuser.nicknameUnique(args['nickname']) current_nickname = webuser.get_nickname(uid) if current_nickname and args['nickname'] and \ current_nickname != args['nickname']: # User tried to set nickname while one is already # defined (policy is that nickname is not to be # changed) mess += '

    ' + _("Your nickname has not been updated") elif (CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 2 or (CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS <= 1 and \ webuser.email_valid_p(args['email']))) \ and (args['nickname'] is None or webuser.nickname_valid_p(args['nickname'])) \ and uid2 != -1 and (uid2 == uid or uid2 == 0) \ and uid_with_the_same_nickname != -1 and (uid_with_the_same_nickname == uid or uid_with_the_same_nickname == 0): if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 3: change = webuser.updateDataUser(uid, args['email'], args['nickname']) else: return webuser.page_not_authorized(req, "../youraccount/change", navmenuid='youraccount') if change: mess += '

    ' + _("Settings successfully edited.") mess += '

    ' + _("Note that if you have changed your email address, " "you will have to %(x_url_open)sreset your password%(x_url_close)s anew.") % \ {'x_url_open': '' % (CFG_SITE_SECURE_URL + '/youraccount/lost?ln=%s' % args['ln']), 'x_url_close': ''} act = "/youraccount/display?ln=%s" % args['ln'] linkname = _("Show account") title = _("Settings edited") elif args['nickname'] is not None and not webuser.nickname_valid_p(args['nickname']): mess += '

    ' + _("Desired nickname %s is invalid.") % cgi.escape(args['nickname']) mess += " " + _("Please try again.") act = "/youraccount/edit?ln=%s" % args['ln'] linkname = _("Edit settings") title = _("Editing settings failed") elif not webuser.email_valid_p(args['email']): mess += '

    ' + _("Supplied email address %s is invalid.") % cgi.escape(args['email']) mess += " " + _("Please try again.") act = "/youraccount/edit?ln=%s" % args['ln'] linkname = _("Edit settings") title = _("Editing settings failed") elif uid2 == -1 or uid2 != uid and not uid2 == 0: mess += '

    ' + _("Supplied email address %s already exists in the database.") % cgi.escape(args['email']) mess += " " + websession_templates.tmpl_lost_your_password_teaser(args['ln']) mess += " " + _("Or please try again.") act = "/youraccount/edit?ln=%s" % args['ln'] linkname = _("Edit settings") title = _("Editing settings failed") elif uid_with_the_same_nickname == -1 or uid_with_the_same_nickname != uid and not uid_with_the_same_nickname == 0: mess += '

    ' + _("Desired nickname %s is already in use.") % cgi.escape(args['nickname']) mess += " " + _("Please try again.") act = "/youraccount/edit?ln=%s" % args['ln'] linkname = _("Edit settings") title = _("Editing settings failed") ## Change passwords: if args['old_password'] or args['password'] or args['password2']: if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 3: mess += '

    ' + _("Users cannot edit passwords on this site.") else: res = run_sql("SELECT id FROM user " "WHERE AES_ENCRYPT(email,%s)=password AND id=%s", (args['old_password'], uid)) if res: if args['password'] == args['password2']: webuser.updatePasswordUser(uid, args['password']) mess += '

    ' + _("Password successfully edited.") act = "/youraccount/display?ln=%s" % args['ln'] linkname = _("Show account") title = _("Password edited") else: mess += '

    ' + _("Both passwords must match.") mess += " " + _("Please try again.") act = "/youraccount/edit?ln=%s" % args['ln'] linkname = _("Edit settings") title = _("Editing password failed") else: mess += '

    ' + _("Wrong old password inserted.") mess += " " + _("Please try again.") act = "/youraccount/edit?ln=%s" % args['ln'] linkname = _("Edit settings") title = _("Editing password failed") ## Change search-related settings: if args['group_records']: prefs = webuser.get_user_preferences(uid) prefs['websearch_group_records'] = args['group_records'] prefs['websearch_latestbox'] = args['latestbox'] prefs['websearch_helpbox'] = args['helpbox'] webuser.set_user_preferences(uid, prefs) title = _("Settings edited") act = "/youraccount/display?ln=%s" % args['ln'] linkname = _("Show account") mess += '

    ' + _("User settings saved correctly.") ## Change language-related settings: if args['lang']: lang = wash_language(args['lang']) prefs = webuser.get_user_preferences(uid) prefs['language'] = lang args['ln'] = lang _ = gettext_set_language(lang) webuser.set_user_preferences(uid, prefs) title = _("Settings edited") act = "/youraccount/display?ln=%s" % args['ln'] linkname = _("Show account") mess += '

    ' + _("User settings saved correctly.") ## Edit cataloging-related settings: if args['bibcatalog_username'] or args['bibcatalog_password']: act = "/youraccount/display?ln=%s" % args['ln'] linkname = _("Show account") if ((len(args['bibcatalog_username']) == 0) or (len(args['bibcatalog_password']) == 0)): title = _("Editing bibcatalog authorization failed") mess += '

    ' + _("Empty username or password") else: title = _("Settings edited") prefs['bibcatalog_username'] = args['bibcatalog_username'] prefs['bibcatalog_password'] = args['bibcatalog_password'] webuser.set_user_preferences(uid, prefs) mess += '

    ' + _("User settings saved correctly.") if not mess: mess = _("Unable to update settings.") if not act: act = "/youraccount/edit?ln=%s" % args['ln'] if not linkname: linkname = _("Edit settings") if not title: title = _("Editing settings failed") ## Finally, output the results: return page(title=title, body=webaccount.perform_back(mess, act, linkname, args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') def lost(self, req, form): args = wash_urlargd(form, {}) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(args['ln']) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../youraccount/lost", navmenuid='youraccount') return page(title=_("Lost your password?"), body=webaccount.perform_lost(args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') def send_email(self, req, form): # set all the declared query fields as local variables args = wash_urlargd(form, {'p_email': (str, None)}) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(args['ln']) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../youraccount/send_email", navmenuid='youraccount') user_prefs = webuser.get_user_preferences(webuser.emailUnique(args['p_email'])) if user_prefs: if user_prefs['login_method'] in CFG_EXTERNAL_AUTHENTICATION and \ CFG_EXTERNAL_AUTHENTICATION[user_prefs['login_method']] is not None: eMsg = _("Cannot send password reset request since you are using external authentication system.") return page(title=_("Your Account"), body=webaccount.perform_emailMessage(eMsg, args['ln']), description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME)), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') try: reset_key = mail_cookie_create_pw_reset(args['p_email'], cookie_timeout=timedelta(days=CFG_WEBSESSION_RESET_PASSWORD_EXPIRE_IN_DAYS)) except InvenioWebAccessMailCookieError: reset_key = None if reset_key is None: eMsg = _("The entered email address does not exist in the database.") return page(title=_("Your Account"), body=webaccount.perform_emailMessage(eMsg, args['ln']), description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') ip_address = req.remote_host or req.remote_ip if not send_email(CFG_SITE_SUPPORT_EMAIL, args['p_email'], "%s %s" % (_("Password reset request for"), CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME)), websession_templates.tmpl_account_reset_password_email_body( args['p_email'],reset_key, ip_address, args['ln'])): eMsg = _("The entered email address is incorrect, please check that it is written correctly (e.g. johndoe@example.com).") return page(title=_("Incorrect email address"), body=webaccount.perform_emailMessage(eMsg, args['ln']), description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') return page(title=_("Reset password link sent"), body=webaccount.perform_emailSent(args['p_email'], args['ln']), description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') def youradminactivities(self, req, form): args = wash_urlargd(form, {}) uid = webuser.getUid(req) user_info = webuser.collect_user_info(req) # load the right message language _ = gettext_set_language(args['ln']) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../youraccount/youradminactivities", navmenuid='admin') return page(title=_("Your Administrative Activities"), body=webaccount.perform_youradminactivities(user_info, args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='admin') def delete(self, req, form): args = wash_urlargd(form, {}) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(args['ln']) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../youraccount/delete", navmenuid='youraccount') return page(title=_("Delete Account"), body=webaccount.perform_delete(args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') def logout(self, req, form): args = wash_urlargd(form, {}) uid = webuser.logoutUser(req) # load the right message language _ = gettext_set_language(args['ln']) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../youraccount/logout", navmenuid='youraccount') if CFG_EXTERNAL_AUTH_USING_SSO: return redirect_to_url(req, CFG_EXTERNAL_AUTH_LOGOUT_SSO) return page(title=_("Logout"), body=webaccount.perform_logout(req, args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords=_("%s, personalize") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') def robotlogout(self, req, form): """ Implement logout method for external service providers. """ webuser.logoutUser(req) if CFG_OPENAIRE_SITE: from invenio.config import CFG_OPENAIRE_PORTAL_URL redirect_to_url(req, CFG_OPENAIRE_PORTAL_URL) else: redirect_to_url(req, "%s/img/pix.png" % CFG_SITE_SECURE_URL) def robotlogin(self, req, form): """ Implement authentication method for external service providers. """ from invenio.external_authentication import InvenioWebAccessExternalAuthError args = wash_urlargd(form, { 'login_method': (str, None), 'remember_me' : (str, ''), 'referer': (str, ''), 'p_un': (str, ''), 'p_pw': (str, '') }) # sanity checks: args['login_method'] = wash_login_method(args['login_method']) args['remember_me'] = args['remember_me'] != '' locals().update(args) if CFG_ACCESS_CONTROL_LEVEL_SITE > 0: return webuser.page_not_authorized(req, "../youraccount/login?ln=%s" % args['ln'], navmenuid='youraccount') uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(args['ln']) try: (iden, args['p_un'], args['p_pw'], msgcode) = webuser.loginUser(req, args['p_un'], args['p_pw'], args['login_method']) except InvenioWebAccessExternalAuthError, err: return page("Error", body=str(err)) if len(iden)>0: uid = webuser.update_Uid(req, args['p_un'], args['remember_me']) uid2 = webuser.getUid(req) if uid2 == -1: webuser.logoutUser(req) return webuser.page_not_authorized(req, "../youraccount/login?ln=%s" % args['ln'], uid=uid, navmenuid='youraccount') # login successful! if args['referer']: if CFG_OPENAIRE_SITE and args['referer'].startswith('https://openaire.cern.ch/deposit'): ## HACK for OpenAIRE args['referer'] = args['referer'].replace('https://openaire.cern.ch/deposit', 'http://openaire.cern.ch/deposit') redirect_to_url(req, args['referer']) else: return self.display(req, form) else: mess = CFG_WEBACCESS_WARNING_MSGS[msgcode] % cgi.escape(args['login_method']) if msgcode == 14: if webuser.username_exists_p(args['p_un']): mess = CFG_WEBACCESS_WARNING_MSGS[15] % cgi.escape(args['login_method']) act = '/youraccount/login%s' % make_canonical_urlargd({'ln' : args['ln'], 'referer' : args['referer']}, {}) return page(title=_("Login"), body=webaccount.perform_back(mess, act, _("login"), args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') def login(self, req, form): args = wash_urlargd(form, { 'p_un': (str, None), 'p_pw': (str, None), 'login_method': (str, None), 'action': (str, ''), 'remember_me' : (str, ''), 'referer': (str, '')}) if CFG_OPENAIRE_SITE: from invenio.config import CFG_OPENAIRE_PORTAL_URL if CFG_OPENAIRE_PORTAL_URL: from invenio.urlutils import create_url from base64 import encodestring invenio_loginurl = args['referer'] or '%s/youraccount/display?ln=%s' % (CFG_SITE_SECURE_URL, args['ln']) loginurl = create_url(CFG_OPENAIRE_PORTAL_URL, {"option": "com_openaire", "view": "login", "return": encodestring(invenio_loginurl)}) redirect_to_url(req, loginurl) # sanity checks: args['login_method'] = wash_login_method(args['login_method']) if args['p_un']: args['p_un'] = args['p_un'].strip() args['remember_me'] = args['remember_me'] != '' locals().update(args) if CFG_ACCESS_CONTROL_LEVEL_SITE > 0: return webuser.page_not_authorized(req, "../youraccount/login?ln=%s" % args['ln'], navmenuid='youraccount') uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(args['ln']) if args['action']: cookie = args['action'] try: action, arguments = mail_cookie_check_authorize_action(cookie) except InvenioWebAccessMailCookieError: pass if not CFG_EXTERNAL_AUTH_USING_SSO: if args['p_un'] is None or not args['login_method']: return page(title=_("Login"), body=webaccount.create_login_page_box(args['referer'], args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p=1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') (iden, args['p_un'], args['p_pw'], msgcode) = webuser.loginUser(req, args['p_un'], args['p_pw'], args['login_method']) else: # Fake parameters for p_un & p_pw because SSO takes them from the environment (iden, args['p_un'], args['p_pw'], msgcode) = webuser.loginUser(req, '', '', CFG_EXTERNAL_AUTH_USING_SSO) args['remember_me'] = False if len(iden)>0: uid = webuser.update_Uid(req, args['p_un'], args['remember_me']) uid2 = webuser.getUid(req) if uid2 == -1: webuser.logoutUser(req) return webuser.page_not_authorized(req, "../youraccount/login?ln=%s" % args['ln'], uid=uid, navmenuid='youraccount') # login successful! try: register_customevent("login", [req.remote_host or req.remote_ip, uid, args['p_un']]) except: register_exception(suffix="Do the webstat tables exists? Try with 'webstatadmin --load-config'") if args['referer']: redirect_to_url(req, args['referer'].replace(CFG_SITE_URL, CFG_SITE_SECURE_URL)) else: return self.display(req, form) else: mess = CFG_WEBACCESS_WARNING_MSGS[msgcode] % cgi.escape(args['login_method']) if msgcode == 14: if webuser.username_exists_p(args['p_un']): mess = CFG_WEBACCESS_WARNING_MSGS[15] % cgi.escape(args['login_method']) act = '/youraccount/login%s' % make_canonical_urlargd({'ln' : args['ln'], 'referer' : args['referer']}, {}) return page(title=_("Login"), body=webaccount.perform_back(mess, act, _("login"), args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description="%s Personalize, Main page" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') def register(self, req, form): args = wash_urlargd(form, { 'p_nickname': (str, None), 'p_email': (str, None), 'p_pw': (str, None), 'p_pw2': (str, None), 'action': (str, "login"), 'referer': (str, "")}) if CFG_ACCESS_CONTROL_LEVEL_SITE > 0: return webuser.page_not_authorized(req, "../youraccount/register?ln=%s" % args['ln'], navmenuid='youraccount') uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(args['ln']) if args['p_nickname'] is None or args['p_email'] is None: return page(title=_("Register"), body=webaccount.create_register_page_box(args['referer'], args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description=_("%s Personalize, Main page") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') mess = "" act = "" if args['p_pw'] == args['p_pw2']: ruid = webuser.registerUser(req, args['p_email'], args['p_pw'], args['p_nickname'], ln=args['ln']) else: ruid = -2 if ruid == 0: mess = _("Your account has been successfully created.") title = _("Account created") if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT == 1: mess += " " + _("In order to confirm its validity, an email message containing an account activation key has been sent to the given email address.") mess += " " + _("Please follow instructions presented there in order to complete the account registration process.") if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 1: mess += " " + _("A second email will be sent when the account has been activated and can be used.") elif CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT != 1: uid = webuser.update_Uid(req, args['p_email']) mess += " " + _("You can now access your %(x_url_open)saccount%(x_url_close)s.") %\ {'x_url_open': '', 'x_url_close': ''} elif ruid == -2: mess = _("Both passwords must match.") mess += " " + _("Please try again.") act = "/youraccount/register?ln=%s" % args['ln'] title = _("Registration failure") elif ruid == 1: mess = _("Supplied email address %s is invalid.") % cgi.escape(args['p_email']) mess += " " + _("Please try again.") act = "/youraccount/register?ln=%s" % args['ln'] title = _("Registration failure") elif ruid == 2: mess = _("Desired nickname %s is invalid.") % cgi.escape(args['p_nickname']) mess += " " + _("Please try again.") act = "/youraccount/register?ln=%s" % args['ln'] title = _("Registration failure") elif ruid == 3: mess = _("Supplied email address %s already exists in the database.") % cgi.escape(args['p_email']) mess += " " + websession_templates.tmpl_lost_your_password_teaser(args['ln']) mess += " " + _("Or please try again.") act = "/youraccount/register?ln=%s" % args['ln'] title = _("Registration failure") elif ruid == 4: mess = _("Desired nickname %s already exists in the database.") % cgi.escape(args['p_nickname']) mess += " " + _("Please try again.") act = "/youraccount/register?ln=%s" % args['ln'] title = _("Registration failure") elif ruid == 5: mess = _("Users cannot register themselves, only admin can register them.") act = "/youraccount/register?ln=%s" % args['ln'] title = _("Registration failure") elif ruid == 6: mess = _("The site is having troubles in sending you an email for confirming your email address.") + _("The error has been logged and will be taken in consideration as soon as possible.") act = "/youraccount/register?ln=%s" % args['ln'] title = _("Registration failure") else: # this should never happen mess = _("Internal Error") act = "/youraccount/register?ln=%s" % args['ln'] title = _("Registration failure") return page(title=title, body=webaccount.perform_back(mess,act, _("register"), args['ln']), navtrail="""""" % (CFG_SITE_SECURE_URL, args['ln']) + _("Your Account") + """""", description=_("%s Personalize, Main page") % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), keywords="%s , personalize" % CFG_SITE_NAME_INTL.get(args['ln'], CFG_SITE_NAME), uid=uid, req=req, secure_page_p = 1, language=args['ln'], lastupdated=__lastupdated__, navmenuid='youraccount') class WebInterfaceYourTicketsPages(WebInterfaceDirectory): #support for /yourtickets url _exports = ['', 'display'] def __call__(self, req, form): #if there is no trailing slash self.index(req, form) def index(self, req, form): #take all the parameters.. unparsed_uri = req.unparsed_uri qstr = "" if unparsed_uri.count('?') > 0: dummy, qstr = unparsed_uri.split('?') qstr = '?'+qstr redirect_to_url(req, '/yourtickets/display'+qstr) def display(self, req, form): #show tickets for this user argd = wash_urlargd(form, {'ln': (str, ''), 'start': (int, 1) }) uid = webuser.getUid(req) ln = argd['ln'] start = argd['start'] _ = gettext_set_language(ln) body = bibcatalog_templates.tmpl_your_tickets(uid, ln, start) return page(title=_("Your tickets"), body=body, navtrail="""""" % (CFG_SITE_SECURE_URL, argd['ln']) + _("Your Account") + """""", uid=uid, req=req, language=argd['ln'], lastupdated=__lastupdated__, secure_page_p=1) class WebInterfaceYourGroupsPages(WebInterfaceDirectory): _exports = ['', 'display', 'create', 'join', 'leave', 'edit', 'members'] def index(self, req, form): redirect_to_url(req, '/yourgroups/display') def display(self, req, form): """ Displays groups the user is admin of and the groups the user is member of(but not admin) @param ln: language @return: the page for all the groups """ argd = wash_urlargd(form, {}) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(argd['ln']) if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../yourgroups/display", navmenuid='yourgroups') user_info = webuser.collect_user_info(req) if not user_info['precached_usegroups']: return webuser.page_not_authorized(req, "../", \ text = _("You are not authorized to use groups.")) body = webgroup.perform_request_groups_display(uid=uid, ln=argd['ln']) return page(title = _("Your Groups"), body = body, navtrail = webgroup.get_navtrail(argd['ln']), uid = uid, req = req, language = argd['ln'], lastupdated = __lastupdated__, navmenuid = 'yourgroups', secure_page_p = 1) def create(self, req, form): """create(): interface for creating a new group @param group_name: : name of the new webgroup.Must be filled @param group_description: : description of the new webgroup.(optionnal) @param join_policy: : join policy of the new webgroup.Must be chosen @param *button: which button was pressed @param ln: language @return: the compose page Create group """ argd = wash_urlargd(form, {'group_name': (str, ""), 'group_description': (str, ""), 'join_policy': (str, ""), 'create_button':(str, ""), 'cancel':(str, "") }) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(argd['ln']) if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../yourgroups/create", navmenuid='yourgroups') user_info = webuser.collect_user_info(req) if not user_info['precached_usegroups']: return webuser.page_not_authorized(req, "../", \ text = _("You are not authorized to use groups.")) if argd['cancel']: url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s' url %= argd['ln'] redirect_to_url(req, url) if argd['create_button'] : body= webgroup.perform_request_create_group(uid=uid, group_name=argd['group_name'], group_description=argd['group_description'], join_policy=argd['join_policy'], ln = argd['ln']) else: body = webgroup.perform_request_input_create_group(group_name=argd['group_name'], group_description=argd['group_description'], join_policy=argd['join_policy'], ln=argd['ln']) title = _("Create new group") return page(title = title, body = body, navtrail = webgroup.get_navtrail(argd['ln'], title), uid = uid, req = req, language = argd['ln'], lastupdated = __lastupdated__, navmenuid = 'yourgroups', secure_page_p = 1) def join(self, req, form): """join(): interface for joining a new group @param grpID: : list of the group the user wants to become a member. The user must select only one group. @param group_name: : will search for groups matching group_name @param *button: which button was pressed @param ln: language @return: the compose page Join group """ argd = wash_urlargd(form, {'grpID':(list, []), 'group_name':(str, ""), 'find_button':(str, ""), 'join_button':(str, ""), 'cancel':(str, "") }) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(argd['ln']) if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../yourgroups/join", navmenuid='yourgroups') user_info = webuser.collect_user_info(req) if not user_info['precached_usegroups']: return webuser.page_not_authorized(req, "../", \ text = _("You are not authorized to use groups.")) if argd['cancel']: url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s' url %= argd['ln'] redirect_to_url(req, url) if argd['join_button']: search = 0 if argd['group_name']: search = 1 body = webgroup.perform_request_join_group(uid, argd['grpID'], argd['group_name'], search, argd['ln']) else: search = 0 if argd['find_button']: search = 1 body = webgroup.perform_request_input_join_group(uid, argd['group_name'], search, ln=argd['ln']) title = _("Join New Group") return page(title = title, body = body, navtrail = webgroup.get_navtrail(argd['ln'], title), uid = uid, req = req, language = argd['ln'], lastupdated = __lastupdated__, navmenuid = 'yourgroups', secure_page_p = 1) def leave(self, req, form): """leave(): interface for leaving a group @param grpID: : group the user wants to leave. @param group_name: : name of the group the user wants to leave @param *button: which button was pressed @param confirmed: : the user is first asked to confirm @param ln: language @return: the compose page Leave group """ argd = wash_urlargd(form, {'grpID':(int, 0), 'group_name':(str, ""), 'leave_button':(str, ""), 'cancel':(str, ""), 'confirmed': (int, 0) }) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(argd['ln']) if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../yourgroups/leave", navmenuid='yourgroups') user_info = webuser.collect_user_info(req) if not user_info['precached_usegroups']: return webuser.page_not_authorized(req, "../", \ text = _("You are not authorized to use groups.")) if argd['cancel']: url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s' url %= argd['ln'] redirect_to_url(req, url) if argd['leave_button']: body = webgroup.perform_request_leave_group(uid, argd['grpID'], argd['confirmed'], argd['ln']) else: body = webgroup.perform_request_input_leave_group(uid=uid, ln=argd['ln']) title = _("Leave Group") return page(title = title, body = body, navtrail = webgroup.get_navtrail(argd['ln'], title), uid = uid, req = req, language = argd['ln'], lastupdated = __lastupdated__, navmenuid = 'yourgroups', secure_page_p = 1) def edit(self, req, form): """edit(): interface for editing group @param grpID: : group ID @param group_name: : name of the new webgroup.Must be filled @param group_description: : description of the new webgroup.(optionnal) @param join_policy: : join policy of the new webgroup.Must be chosen @param update: button update group pressed @param delete: button delete group pressed @param cancel: button cancel pressed @param confirmed: : the user is first asked to confirm before deleting @param ln: language @return: the main page displaying all the groups """ argd = wash_urlargd(form, {'grpID': (int, 0), 'update': (str, ""), 'cancel': (str, ""), 'delete': (str, ""), 'group_name': (str, ""), 'group_description': (str, ""), 'join_policy': (str, ""), 'confirmed': (int, 0) }) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(argd['ln']) if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../yourgroups/display", navmenuid='yourgroups') user_info = webuser.collect_user_info(req) if not user_info['precached_usegroups']: return webuser.page_not_authorized(req, "../", \ text = _("You are not authorized to use groups.")) if argd['cancel']: url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s' url %= argd['ln'] redirect_to_url(req, url) elif argd['delete']: body = webgroup.perform_request_delete_group(uid=uid, grpID=argd['grpID'], confirmed=argd['confirmed']) elif argd['update']: body = webgroup.perform_request_update_group(uid= uid, grpID=argd['grpID'], group_name=argd['group_name'], group_description=argd['group_description'], join_policy=argd['join_policy'], ln=argd['ln']) else : body= webgroup.perform_request_edit_group(uid=uid, grpID=argd['grpID'], ln=argd['ln']) title = _("Edit Group") return page(title = title, body = body, navtrail = webgroup.get_navtrail(argd['ln'], title), uid = uid, req = req, language = argd['ln'], lastupdated = __lastupdated__, navmenuid = 'yourgroups', secure_page_p = 1) def members(self, req, form): """member(): interface for managing members of a group @param grpID: : group ID @param add_member: button add_member pressed @param remove_member: button remove_member pressed @param reject_member: button reject__member pressed @param delete: button delete group pressed @param member_id: : ID of the existing member selected @param pending_member_id: : ID of the pending member selected @param cancel: button cancel pressed @param info: : info about last user action @param ln: language @return: the same page with data updated """ argd = wash_urlargd(form, {'grpID': (int, 0), 'cancel': (str, ""), 'add_member': (str, ""), 'remove_member': (str, ""), 'reject_member': (str, ""), 'member_id': (int, 0), 'pending_member_id': (int, 0) }) uid = webuser.getUid(req) # load the right message language _ = gettext_set_language(argd['ln']) if uid == -1 or webuser.isGuestUser(uid) or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return webuser.page_not_authorized(req, "../yourgroups/display", navmenuid='yourgroups') user_info = webuser.collect_user_info(req) if not user_info['precached_usegroups']: return webuser.page_not_authorized(req, "../", \ text = _("You are not authorized to use groups.")) if argd['cancel']: url = CFG_SITE_SECURE_URL + '/yourgroups/display?ln=%s' url %= argd['ln'] redirect_to_url(req, url) if argd['remove_member']: body = webgroup.perform_request_remove_member(uid=uid, grpID=argd['grpID'], member_id=argd['member_id'], ln=argd['ln']) elif argd['reject_member']: body = webgroup.perform_request_reject_member(uid=uid, grpID=argd['grpID'], user_id=argd['pending_member_id'], ln=argd['ln']) elif argd['add_member']: body = webgroup.perform_request_add_member(uid=uid, grpID=argd['grpID'], user_id=argd['pending_member_id'], ln=argd['ln']) else: body= webgroup.perform_request_manage_member(uid=uid, grpID=argd['grpID'], ln=argd['ln']) title = _("Edit group members") return page(title = title, body = body, navtrail = webgroup.get_navtrail(argd['ln'], title), uid = uid, req = req, language = argd['ln'], lastupdated = __lastupdated__, navmenuid = 'yourgroups', secure_page_p = 1) def wash_login_method(login_method): """ Wash the login_method parameter that came from the web input form. @param login_method: Wanted login_method value as it came from the web input form. @type login_method: string @return: Washed version of login_method. If the login_method value is valid, then return it. If it is not valid, then return `Local' (the default login method). @rtype: string @warning: Beware, 'Local' is hardcoded here! """ if login_method in CFG_EXTERNAL_AUTHENTICATION: return login_method else: return 'Local' diff --git a/modules/websession/lib/webuser.py b/modules/websession/lib/webuser.py index c0a2d2e16..c2f0889e1 100644 --- a/modules/websession/lib/webuser.py +++ b/modules/websession/lib/webuser.py @@ -1,1273 +1,1345 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ This file implements all methods necessary for working with users and sessions in Invenio. Contains methods for logging/registration when a user log/register into the system, checking if it is a guest user or not. At the same time this presents all the stuff it could need with sessions managements, working with websession. It also contains Apache-related user authentication stuff. """ __revision__ = "$Id$" import cgi import urllib import urlparse from socket import gaierror import socket import smtplib import re import random import datetime from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS, \ CFG_ACCESS_CONTROL_LEVEL_GUESTS, \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN, \ CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS, \ CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT, \ CFG_SITE_ADMIN_EMAIL, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_SITE_NAME_INTL, \ CFG_SITE_SUPPORT_EMAIL, \ CFG_SITE_SECURE_URL, \ CFG_SITE_URL, \ CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS, \ CFG_CERN_SITE, \ CFG_INSPIRE_SITE, \ CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL, \ CFG_BIBAUTHORID_ENABLED, \ CFG_SITE_RECORD try: from invenio.session import get_session except ImportError: pass from invenio.dbquery import run_sql, OperationalError, \ serialize_via_marshal, deserialize_via_marshal from invenio.access_control_admin import acc_get_role_id, acc_get_action_roles, acc_get_action_id, acc_is_user_in_role, acc_find_possible_activities from invenio.access_control_mailcookie import mail_cookie_create_mail_activation from invenio.access_control_firerole import acc_firerole_check_user, load_role_definition from invenio.access_control_config import SUPERADMINROLE, CFG_EXTERNAL_AUTH_USING_SSO from invenio.messages import gettext_set_language, wash_languages, wash_language from invenio.mailutils import send_email from invenio.errorlib import register_exception from invenio.webgroup_dblayer import get_groups from invenio.external_authentication import InvenioWebAccessExternalAuthError from invenio.access_control_config import CFG_EXTERNAL_AUTHENTICATION, \ CFG_WEBACCESS_MSGS, CFG_WEBACCESS_WARNING_MSGS, CFG_EXTERNAL_AUTH_DEFAULT +from invenio.webuser_config import CFG_WEBUSER_USER_TABLES import invenio.template tmpl = invenio.template.load('websession') re_invalid_nickname = re.compile(""".*[,'@]+.*""") # pylint: disable=C0301 def createGuestUser(): """Create a guest user , insert into user null values in all fields createGuestUser() -> GuestUserID """ if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0: try: return run_sql("insert into user (email, note) values ('', '1')") except OperationalError: return None else: try: return run_sql("insert into user (email, note) values ('', '0')") except OperationalError: return None def page_not_authorized(req, referer='', uid='', text='', navtrail='', ln=CFG_SITE_LANG, navmenuid=""): """Show error message when user is not authorized to do something. @param referer: in case the displayed message propose a login link, this is the url to return to after logging in. If not specified it is guessed from req. @param uid: the uid of the user. If not specified it is guessed from req. @param text: the message to be displayed. If not specified it will be guessed from the context. """ from invenio.webpage import page _ = gettext_set_language(ln) if not referer: referer = req.unparsed_uri if not CFG_ACCESS_CONTROL_LEVEL_SITE: title = CFG_WEBACCESS_MSGS[5] if not uid: uid = getUid(req) try: res = run_sql("SELECT email FROM user WHERE id=%s AND note=1", (uid,)) if res and res[0][0]: if text: body = text else: body = "%s %s" % (CFG_WEBACCESS_WARNING_MSGS[9] % cgi.escape(res[0][0]), ("%s %s" % (CFG_WEBACCESS_MSGS[0] % urllib.quote(referer), CFG_WEBACCESS_MSGS[1]))) else: if text: body = text else: if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 1: body = CFG_WEBACCESS_MSGS[3] else: body = CFG_WEBACCESS_WARNING_MSGS[4] + CFG_WEBACCESS_MSGS[2] except OperationalError, e: body = _("Database problem") + ': ' + str(e) elif CFG_ACCESS_CONTROL_LEVEL_SITE == 1: title = CFG_WEBACCESS_MSGS[8] body = "%s %s" % (CFG_WEBACCESS_MSGS[7], CFG_WEBACCESS_MSGS[2]) elif CFG_ACCESS_CONTROL_LEVEL_SITE == 2: title = CFG_WEBACCESS_MSGS[6] body = "%s %s" % (CFG_WEBACCESS_MSGS[4], CFG_WEBACCESS_MSGS[2]) return page(title=title, language=ln, uid=getUid(req), body=body, navtrail=navtrail, req=req, navmenuid=navmenuid) def getUid(req): """Return user ID taking it from the cookie of the request. Includes control mechanism for the guest users, inserting in the database table when need be, raising the cookie back to the client. User ID is set to 0 when client refuses cookie or we are in the read-only site operation mode. User ID is set to -1 when we are in the permission denied site operation mode. getUid(req) -> userId """ if hasattr(req, '_user_info'): return req._user_info['uid'] if CFG_ACCESS_CONTROL_LEVEL_SITE == 1: return 0 if CFG_ACCESS_CONTROL_LEVEL_SITE == 2: return -1 guest = 0 try: session = get_session(req) except Exception: ## Not possible to obtain a session return 0 uid = session.get('uid', -1) if not session.need_https: if uid == -1: # first time, so create a guest user if CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS: uid = session['uid'] = createGuestUser() session.set_remember_me(False) guest = 1 else: if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0: session['uid'] = 0 session.set_remember_me(False) return 0 else: return -1 else: if not hasattr(req, '_user_info') and 'user_info' in session: req._user_info = session['user_info'] req._user_info = collect_user_info(req, refresh=True) if guest == 0: guest = isGuestUser(uid) if guest: if CFG_ACCESS_CONTROL_LEVEL_GUESTS == 0: return uid elif CFG_ACCESS_CONTROL_LEVEL_GUESTS >= 1: return -1 else: res = run_sql("SELECT note FROM user WHERE id=%s", (uid,)) if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 0: return uid elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 1 and res and res[0][0] in [1, "1"]: return uid else: return -1 def setUid(req, uid, remember_me=False): """It sets the userId into the session, and raise the cookie to the client. """ if hasattr(req, '_user_info'): del req._user_info session = get_session(req) session.invalidate() session = get_session(req) session['uid'] = uid if remember_me: session.set_timeout(86400) session.set_remember_me(remember_me) if uid > 0: user_info = collect_user_info(req, login_time=True) session['user_info'] = user_info req._user_info = user_info else: del session['user_info'] return uid def session_param_del(req, key): """ Remove a given key from the session. """ session = get_session(req) del session[key] def session_param_set(req, key, value): """ Associate a VALUE to the session param KEY for the current session. """ session = get_session(req) session[key] = value def session_param_get(req, key): """ Return session parameter value associated with session parameter KEY for the current session. If the key doesn't exists raise KeyError. """ session = get_session(req) return session[key] def session_param_list(req): """ List all available session parameters. """ session = get_session(req) return session.keys() def get_last_login(uid): """Return the last_login datetime for uid if any, otherwise return the Epoch.""" res = run_sql('SELECT last_login FROM user WHERE id=%s', (uid,), 1) if res and res[0][0]: return res[0][0] else: return datetime.datetime(1970, 1, 1) def get_user_info(uid, ln=CFG_SITE_LANG): """Get infos for a given user. @param uid: user id (int) @return: tuple: (uid, nickname, display_name) """ _ = gettext_set_language(ln) query = """SELECT id, nickname FROM user WHERE id=%s""" res = run_sql(query, (uid,)) if res: if res[0]: user = list(res[0]) if user[1]: user.append(user[1]) else: user[1] = str(user[0]) user.append(_("user") + ' #' + str(user[0])) return tuple(user) return (uid, '', _("N/A")) def get_uid_from_email(email): """Return the uid corresponding to an email. Return -1 when the email does not exists.""" try: res = run_sql("SELECT id FROM user WHERE email=%s", (email,)) if res: return res[0][0] else: return -1 except OperationalError: register_exception() return -1 def isGuestUser(uid, run_on_slave=True): """It Checks if the userId corresponds to a guestUser or not isGuestUser(uid) -> boolean """ out = 1 try: res = run_sql("SELECT email FROM user WHERE id=%s LIMIT 1", (uid,), 1, run_on_slave=run_on_slave) if res: if res[0][0]: out = 0 except OperationalError: register_exception() return out def isUserSubmitter(user_info): """Return True if the user is a submitter for something; False otherwise.""" u_email = get_email(user_info['uid']) res = run_sql("SELECT email FROM sbmSUBMISSIONS WHERE email=%s LIMIT 1", (u_email,), 1) return len(res) > 0 def isUserReferee(user_info): """Return True if the user is a referee for something; False otherwise.""" if CFG_CERN_SITE: return True else: for (role_id, role_name, role_description) in acc_get_action_roles(acc_get_action_id('referee')): if acc_is_user_in_role(user_info, role_id): return True return False def isUserAdmin(user_info): """Return True if the user has some admin rights; False otherwise.""" return acc_find_possible_activities(user_info) != {} def isUserSuperAdmin(user_info): """Return True if the user is superadmin; False otherwise.""" if run_sql("""SELECT r.id FROM accROLE r LEFT JOIN user_accROLE ur ON r.id = ur.id_accROLE WHERE r.name = %s AND ur.id_user = %s AND ur.expiration>=NOW() LIMIT 1""", (SUPERADMINROLE, user_info['uid']), 1, run_on_slave=True): return True return acc_firerole_check_user(user_info, load_role_definition(acc_get_role_id(SUPERADMINROLE))) def nickname_valid_p(nickname): """Check whether wanted NICKNAME supplied by the user is valid. At the moment we just check whether it is not empty, does not contain blanks or @, is not equal to `guest', etc. This check relies on re_invalid_nickname regexp (see above) Return 1 if nickname is okay, return 0 if it is not. """ if nickname and \ not(nickname.startswith(' ') or nickname.endswith(' ')) and \ nickname.lower() != 'guest': if not re_invalid_nickname.match(nickname): return 1 return 0 def email_valid_p(email): """Check whether wanted EMAIL address supplied by the user is valid. At the moment we just check whether it contains '@' and whether it doesn't contain blanks. We also check the email domain if CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN is set. Return 1 if email is okay, return 0 if it is not. """ if (email.find("@") <= 0) or (email.find(" ") > 0): return 0 elif CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN: if not email.endswith(CFG_ACCESS_CONTROL_LIMIT_REGISTRATION_TO_DOMAIN): return 0 return 1 def confirm_email(email): """Confirm the email. It returns None when there are problems, otherwise it return the uid involved.""" if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 0: activated = 1 elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1: activated = 0 elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 2: return -1 run_sql('UPDATE user SET note=%s where email=%s', (activated, email)) res = run_sql('SELECT id FROM user where email=%s', (email,)) if res: if CFG_ACCESS_CONTROL_NOTIFY_ADMIN_ABOUT_NEW_ACCOUNTS: send_new_admin_account_warning(email, CFG_SITE_ADMIN_EMAIL) return res[0][0] else: return None def registerUser(req, email, passw, nickname, register_without_nickname=False, login_method=None, ln=CFG_SITE_LANG): """Register user with the desired values of NICKNAME, EMAIL and PASSW. If REGISTER_WITHOUT_NICKNAME is set to True, then ignore desired NICKNAME and do not set any. This is suitable for external authentications so that people can login without having to register an internal account first. Return 0 if the registration is successful, 1 if email is not valid, 2 if nickname is not valid, 3 if email is already in the database, 4 if nickname is already in the database, 5 when users cannot register themselves because of the site policy, 6 when the site is having problem contacting the user. If login_method is None or is equal to the key corresponding to local authentication, then CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS is taken in account for deciding the behaviour about registering. """ # is email valid? email = email.lower() if not email_valid_p(email): return 1 _ = gettext_set_language(ln) # is email already taken? res = run_sql("SELECT email FROM user WHERE email=%s", (email,)) if len(res) > 0: return 3 if register_without_nickname: # ignore desired nick and use default empty string one: nickname = "" else: # is nickname valid? if not nickname_valid_p(nickname): return 2 # is nickname already taken? res = run_sql("SELECT nickname FROM user WHERE nickname=%s", (nickname,)) if len(res) > 0: return 4 activated = 1 # By default activated if not login_method or not CFG_EXTERNAL_AUTHENTICATION[login_method]: # local login if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 2: return 5 elif CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT: activated = 2 # Email confirmation required elif CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 1: activated = 0 # Administrator confirmation required if CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT: address_activation_key = mail_cookie_create_mail_activation(email) ip_address = req.remote_host or req.remote_ip try: if not send_email(CFG_SITE_SUPPORT_EMAIL, email, _("Account registration at %s") % CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME), tmpl.tmpl_account_address_activation_email_body(email, address_activation_key, ip_address, ln)): return 1 except (smtplib.SMTPException, socket.error): return 6 # okay, go on and register the user: user_preference = get_default_user_preferences() uid = run_sql("INSERT INTO user (nickname, email, password, note, settings, last_login) " "VALUES (%s,%s,AES_ENCRYPT(email,%s),%s,%s, NOW())", (nickname, email, passw, activated, serialize_via_marshal(user_preference))) if activated == 1: # Ok we consider the user as logged in :-) setUid(req, uid) return 0 def updateDataUser(uid, email, nickname): """ Update user data. Used when a user changed his email or password or nickname. """ email = email.lower() if email == 'guest': return 0 if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 2: run_sql("update user set email=%s where id=%s", (email, uid)) if nickname and nickname != '': run_sql("update user set nickname=%s where id=%s", (nickname, uid)) return 1 def updatePasswordUser(uid, password): """Update the password of a user.""" if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS < 3: run_sql("update user set password=AES_ENCRYPT(email,%s) where id=%s", (password, uid)) return 1 +def merge_usera_into_userb(id_usera, id_userb): + """ + Merges all the information of usera into userb. + Deletes afterwards any reference to usera. + The information about SQL tables is contained in the CFG_WEBUSER_USER_TABLES + variable. + """ + preferencea = get_user_preferences(id_usera) + preferenceb = get_user_preferences(id_userb) + preferencea.update(preferenceb) + set_user_preferences(id_userb, preferencea) + try: + for table, dummy in CFG_WEBUSER_USER_TABLES: + run_sql("LOCK TABLE %s WRITE" % table) + index = 0 + try: + for index, (table, column) in enumerate(CFG_WEBUSER_USER_TABLES): + run_sql("UPDATE %(table)s SET %(column)s=%%s WHERE %(column)s=%%s; DELETE FROM %(table)s WHERE %(column)s=%%s;" % { + 'table': table, + 'column': column + }, (id_userb, id_usera, id_usera)) + except Exception, err: + msg = "Error when merging id_user=%s into id_userb=%s for table %s: %s\n" + msg += "users where succesfully already merged for tables: %s\n" % ', '.join(CFG_WEBUSER_USER_TABLES[:index]) + msg += "users where not succesfully already merged for tables: %s\n" % ', '.join(CFG_WEBUSER_USER_TABLES[index:]) + register_exception(alert_admin=True, prefix=msg) + raise + finally: + run_sql("UNLOCK TABLES") + def loginUser(req, p_un, p_pw, login_method): """It is a first simple version for the authentication of user. It returns the id of the user, for checking afterwards if the login is correct """ # p_un passed may be an email or a nickname: p_email = get_email_from_username(p_un) # go on with the old stuff based on p_email: if not login_method in CFG_EXTERNAL_AUTHENTICATION: return ([], p_email, p_pw, 12) - if CFG_EXTERNAL_AUTHENTICATION[login_method]: # External Authenthication + if CFG_EXTERNAL_AUTHENTICATION[login_method]: # External Authentication try: - p_email = CFG_EXTERNAL_AUTHENTICATION[login_method].auth_user(p_email, p_pw, req) or CFG_EXTERNAL_AUTHENTICATION[login_method].auth_user(p_un, p_pw, req) ## We try to login with either the email of the nickname + result = CFG_EXTERNAL_AUTHENTICATION[login_method].auth_user(p_email, p_pw, req) + if result == (None, None) or result is None: + result = CFG_EXTERNAL_AUTHENTICATION[login_method].auth_user(p_un, p_pw, req) ## We try to login with either the email of the nickname + if isinstance(result, (tuple, list)) and len(result) == 2: + p_email, p_extid = result + else: + ## For backward compatibility we use the email as external + ## identifier if it was not returned already by the plugin + p_email, p_extid = str(result), str(result) if p_email: p_email = p_email.lower() + if not p_extid: + p_extid = p_email else: return([], p_email, p_pw, 15) except InvenioWebAccessExternalAuthError: register_exception(req=req, alert_admin=True) raise if p_email: # Authenthicated externally - query_result = run_sql("SELECT id from user where email=%s", (p_email,)) - if not query_result: # First time user - p_pw_local = int(random.random() * 1000000) - p_nickname = '' - if CFG_EXTERNAL_AUTHENTICATION[login_method].enforce_external_nicknames: - try: # Let's discover the external nickname! - p_nickname = CFG_EXTERNAL_AUTHENTICATION[login_method].fetch_user_nickname(p_email, p_pw, req) - except (AttributeError, NotImplementedError): - pass - except: - register_exception(req=req, alert_admin=True) - raise - res = registerUser(req, p_email, p_pw_local, p_nickname, - register_without_nickname=p_nickname == '', - login_method=login_method) - if res == 4 or res == 2: # The nickname was already taken - res = registerUser(req, p_email, p_pw_local, '', - register_without_nickname=True, - login_method=login_method) - query_result = run_sql("SELECT id from user where email=%s", (p_email,)) - elif res == 0: # Everything was ok, with or without nickname. - query_result = run_sql("SELECT id from user where email=%s", (p_email,)) - elif res == 6: # error in contacting the user via email - return([], p_email, p_pw_local, 19) + res = run_sql("SELECT id_user FROM userEXT WHERE id=%s and method=%s", (p_extid, login_method)) + if res: + ## User was already registered with this external method. + id_user = res[0][0] + old_email = run_sql("SELECT email FROM user WHERE id=%s", (id_user,))[0][0] + if old_email != p_email: + ## User has changed email of reference. + res = run_sql("SELECT id FROM user WHERE email=%s", (p_email,)) + if res: + ## User was also registered with the other email. + ## We should merge the two! + new_id = res[0][0] + merge_usera_into_userb(id_user, new_id) + run_sql("DELETE FROM user WHERE id=%s", (id_user, )) + id_user = new_id + else: + ## We just need to rename the email address of the + ## corresponding user. Unfortunately the local + ## password will be then invalid, but its unlikely + ## the user is using both an external and a local + ## account. + run_sql("UPDATE user SET email=%s WHERE id=%s", (p_email, id_user)) + else: + ## User was not already registered with this external method. + res = run_sql("SELECT id FROM user WHERE email=%s", (p_email, )) + if res: + ## The user was already known with this email + id_user = res[0][0] + ## We fix the inconsistence in the userEXT table. + run_sql("INSERT INTO userEXT(id, method, id_user) VALUES(%s, %s, %s) ON DUPLICATE KEY UPDATE id=%s, method=%s, id_user=%s", (p_extid, login_method, id_user, p_extid, login_method, id_user)) else: - return([], p_email, p_pw_local, 13) - elif CFG_EXTERNAL_AUTHENTICATION[login_method].enforce_external_nicknames: + ## First time user + p_pw_local = int(random.random() * 1000000) + p_nickname = '' + if CFG_EXTERNAL_AUTHENTICATION[login_method].enforce_external_nicknames: + try: # Let's discover the external nickname! + p_nickname = CFG_EXTERNAL_AUTHENTICATION[login_method].fetch_user_nickname(p_email, p_pw, req) + except (AttributeError, NotImplementedError): + pass + except: + register_exception(req=req, alert_admin=True) + raise + res = registerUser(req, p_email, p_pw_local, p_nickname, + register_without_nickname=p_nickname == '', + login_method=login_method) + if res == 4 or res == 2: # The nickname was already taken + res = registerUser(req, p_email, p_pw_local, '', + register_without_nickname=True, + login_method=login_method) + id_user = run_sql("SELECT id from user where email=%s", (p_email,))[0][0] + elif res == 0: # Everything was ok, with or without nickname. + id_user = run_sql("SELECT id from user where email=%s", (p_email,))[0][0] + elif res == 6: # error in contacting the user via email + return([], p_email, p_pw_local, 19) + else: + return([], p_email, p_pw_local, 13) + run_sql("INSERT INTO userEXT(id, method, id_user) VALUES(%s, %s, %s)", (p_extid, login_method, id_user)) + if CFG_EXTERNAL_AUTHENTICATION[login_method].enforce_external_nicknames: ## Let's still fetch a possibly upgraded nickname. try: # Let's discover the external nickname! p_nickname = CFG_EXTERNAL_AUTHENTICATION[login_method].fetch_user_nickname(p_email, p_pw, req) if nickname_valid_p(p_nickname) and nicknameUnique(p_nickname) == 0: - updateDataUser(query_result[0][0], p_email, p_nickname) + updateDataUser(id_user, p_email, p_nickname) except (AttributeError, NotImplementedError): pass except: register_exception(alert_admin=True) raise try: groups = CFG_EXTERNAL_AUTHENTICATION[login_method].fetch_user_groups_membership(p_email, p_pw, req) # groups is a dictionary {group_name : group_description,} new_groups = {} for key, value in groups.items(): new_groups[key + " [" + str(login_method) + "]"] = value groups = new_groups except (AttributeError, NotImplementedError): pass except: register_exception(req=req, alert_admin=True) return([], p_email, p_pw, 16) else: # Groups synchronization if groups: - userid = query_result[0][0] from invenio.webgroup import synchronize_external_groups - synchronize_external_groups(userid, groups, login_method) + synchronize_external_groups(id_user, groups, login_method) - user_prefs = get_user_preferences(query_result[0][0]) + user_prefs = get_user_preferences(id_user) if not CFG_EXTERNAL_AUTHENTICATION[login_method]: ## I.e. if the login method is not of robot type: if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS >= 4: # Let's prevent the user to switch login_method if user_prefs.has_key("login_method") and \ user_prefs["login_method"] != login_method: return([], p_email, p_pw, 11) user_prefs["login_method"] = login_method # Cleaning external settings for key in user_prefs.keys(): if key.startswith('EXTERNAL_'): del user_prefs[key] try: # Importing external settings new_prefs = CFG_EXTERNAL_AUTHENTICATION[login_method].fetch_user_preferences(p_email, p_pw, req) for key, value in new_prefs.items(): user_prefs['EXTERNAL_' + key] = value except (AttributeError, NotImplementedError): pass except InvenioWebAccessExternalAuthError: register_exception(req=req, alert_admin=True) return([], p_email, p_pw, 16) # Storing settings - set_user_preferences(query_result[0][0], user_prefs) + set_user_preferences(id_user, user_prefs) else: return ([], p_un, p_pw, 10) else: # Internal Authenthication if not p_pw: p_pw = '' query_result = run_sql("SELECT id,email,note from user where email=%s and password=AES_ENCRYPT(email,%s)", (p_email, p_pw,)) if query_result: #FIXME drop external groups and settings note = query_result[0][2] + id_user = query_result[0][0] if note == '1': # Good account preferred_login_method = get_user_preferences(query_result[0][0])['login_method'] p_email = query_result[0][1].lower() if login_method != preferred_login_method: if preferred_login_method in CFG_EXTERNAL_AUTHENTICATION: return ([], p_email, p_pw, 11) elif note == '2': # Email address need to be confirmed by user return ([], p_email, p_pw, 17) elif note == '0': # Account need to be confirmed by administrator return ([], p_email, p_pw, 18) else: return ([], p_email, p_pw, 14) # Login successful! Updating the last access time run_sql("UPDATE user SET last_login=NOW() WHERE email=%s", (p_email,)) return (query_result, p_email, p_pw, 0) def drop_external_settings(userId): """Drop the external (EXTERNAL_) settings of userid.""" prefs = get_user_preferences(userId) for key in prefs.keys(): if key.startswith('EXTERNAL_'): del prefs[key] set_user_preferences(userId, prefs) def logoutUser(req): """It logout the user of the system, creating a guest user. """ session = get_session(req) if CFG_WEBSESSION_DIFFERENTIATE_BETWEEN_GUESTS: uid = createGuestUser() session['uid'] = uid session.set_remember_me(False) else: uid = 0 session.invalidate() if hasattr(req, '_user_info'): delattr(req, '_user_info') return uid def username_exists_p(username): """Check if USERNAME exists in the system. Username may be either nickname or email. Return 1 if it does exist, 0 if it does not. """ if username == "": # return not exists if asked for guest users return 0 res = run_sql("SELECT email FROM user WHERE email=%s", (username,)) + \ run_sql("SELECT email FROM user WHERE nickname=%s", (username,)) if len(res) > 0: return 1 return 0 def emailUnique(p_email): """Check if the email address only exists once. If yes, return userid, if not, -1 """ query_result = run_sql("select id, email from user where email=%s", (p_email,)) if len(query_result) == 1: return query_result[0][0] elif len(query_result) == 0: return 0 return -1 def nicknameUnique(p_nickname): """Check if the nickname only exists once. If yes, return userid, if not, -1 """ query_result = run_sql("select id, nickname from user where nickname=%s", (p_nickname,)) if len(query_result) == 1: return query_result[0][0] elif len(query_result) == 0: return 0 return -1 def update_Uid(req, p_email, remember_me=False): """It updates the userId of the session. It is used when a guest user is logged in succesfully in the system with a given email and password. As a side effect it will discover all the restricted collection to which the user has right to """ query_ID = int(run_sql("select id from user where email=%s", (p_email,))[0][0]) setUid(req, query_ID, remember_me) return query_ID def send_new_admin_account_warning(new_account_email, send_to, ln=CFG_SITE_LANG): """Send an email to the address given by send_to about the new account new_account_email.""" _ = gettext_set_language(ln) sub = _("New account on") + " '%s'" % CFG_SITE_NAME if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1: sub += " - " + _("PLEASE ACTIVATE") body = _("A new account has been created on") + " '%s'" % CFG_SITE_NAME if CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS == 1: body += _(" and is awaiting activation") body += ":\n\n" body += _(" Username/Email") + ": %s\n\n" % new_account_email body += _("You can approve or reject this account request at") + ": %s/admin/webaccess/webaccessadmin.py/manageaccounts\n" % CFG_SITE_URL return send_email(CFG_SITE_SUPPORT_EMAIL, send_to, subject=sub, content=body) def get_email(uid): """Return email address of the user uid. Return string 'guest' in case the user is not found.""" out = "guest" res = run_sql("SELECT email FROM user WHERE id=%s", (uid,), 1) if res and res[0][0]: out = res[0][0].lower() return out def get_email_from_username(username): """Return email address of the user corresponding to USERNAME. The username may be either nickname or email. Return USERNAME untouched if not found in the database or if found several matching entries. """ if username == '': return '' out = username res = run_sql("SELECT email FROM user WHERE email=%s", (username,), 1) + \ run_sql("SELECT email FROM user WHERE nickname=%s", (username,), 1) if res and len(res) == 1: out = res[0][0].lower() return out #def get_password(uid): #"""Return password of the user uid. Return None in case #the user is not found.""" #out = None #res = run_sql("SELECT password FROM user WHERE id=%s", (uid,), 1) #if res and res[0][0] != None: #out = res[0][0] #return out def get_nickname(uid): """Return nickname of the user uid. Return None in case the user is not found.""" out = None res = run_sql("SELECT nickname FROM user WHERE id=%s", (uid,), 1) if res and res[0][0]: out = res[0][0] return out def get_nickname_or_email(uid): """Return nickname (preferred) or the email address of the user uid. Return string 'guest' in case the user is not found.""" out = "guest" res = run_sql("SELECT nickname, email FROM user WHERE id=%s", (uid,), 1) if res and res[0]: if res[0][0]: out = res[0][0] elif res[0][1]: out = res[0][1].lower() return out def create_userinfobox_body(req, uid, language="en"): """Create user info box body for user UID in language LANGUAGE.""" if req: if req.subprocess_env.has_key('HTTPS') \ and req.subprocess_env['HTTPS'] == 'on': url_referer = CFG_SITE_SECURE_URL + req.unparsed_uri else: url_referer = CFG_SITE_URL + req.unparsed_uri if '/youraccount/logout' in url_referer: url_referer = '' else: url_referer = CFG_SITE_URL user_info = collect_user_info(req) try: return tmpl.tmpl_create_userinfobox(ln=language, url_referer=url_referer, guest=int(user_info['guest']), username=get_nickname_or_email(uid), submitter=user_info['precached_viewsubmissions'], referee=user_info['precached_useapprove'], admin=user_info['precached_useadmin'], usebaskets=user_info['precached_usebaskets'], usemessages=user_info['precached_usemessages'], usealerts=user_info['precached_usealerts'], usegroups=user_info['precached_usegroups'], useloans=user_info['precached_useloans'], usestats=user_info['precached_usestats'] ) except OperationalError: return "" def create_useractivities_menu(req, uid, navmenuid, ln="en"): """Create user activities menu. @param req: request object @param uid: user id @type uid: int @param navmenuid: the section of the website this page belongs (search, submit, baskets, etc.) @type navmenuid: string @param ln: language @type ln: string @return: HTML menu of the user activities @rtype: string """ if req: if req.subprocess_env.has_key('HTTPS') \ and req.subprocess_env['HTTPS'] == 'on': url_referer = CFG_SITE_SECURE_URL + req.unparsed_uri else: url_referer = CFG_SITE_URL + req.unparsed_uri if '/youraccount/logout' in url_referer: url_referer = '' else: url_referer = CFG_SITE_URL user_info = collect_user_info(req) is_user_menu_selected = False if navmenuid == 'personalize' or \ navmenuid.startswith('your') and \ navmenuid != 'youraccount': is_user_menu_selected = True try: return tmpl.tmpl_create_useractivities_menu( ln=ln, selected=is_user_menu_selected, url_referer=url_referer, guest=int(user_info['guest']), username=get_nickname_or_email(uid), submitter=user_info['precached_viewsubmissions'], referee=user_info['precached_useapprove'], admin=user_info['precached_useadmin'], usebaskets=user_info['precached_usebaskets'], usemessages=user_info['precached_usemessages'], usealerts=user_info['precached_usealerts'], usegroups=user_info['precached_usegroups'], useloans=user_info['precached_useloans'], usestats=user_info['precached_usestats'] ) except OperationalError: return "" def create_adminactivities_menu(req, uid, navmenuid, ln="en"): """Create admin activities menu. @param req: request object @param uid: user id @type uid: int @param navmenuid: the section of the website this page belongs (search, submit, baskets, etc.) @type navmenuid: string @param ln: language @type ln: string @return: HTML menu of the user activities @rtype: string """ _ = gettext_set_language(ln) if req: if req.subprocess_env.has_key('HTTPS') \ and req.subprocess_env['HTTPS'] == 'on': url_referer = CFG_SITE_SECURE_URL + req.unparsed_uri else: url_referer = CFG_SITE_URL + req.unparsed_uri if '/youraccount/logout' in url_referer: url_referer = '' else: url_referer = CFG_SITE_URL user_info = collect_user_info(req) activities = acc_find_possible_activities(user_info, ln) # For BibEdit and BibDocFile menu items, take into consideration # current record whenever possible if activities.has_key(_("Run Record Editor")) or \ activities.has_key(_("Run Document File Manager")) and \ user_info['uri'].startswith('/' + CFG_SITE_RECORD + '/'): try: # Get record ID and try to cast it to an int current_record_id = int(urlparse.urlparse(user_info['uri'])[2].split('/')[2]) except: pass else: if activities.has_key(_("Run Record Editor")): activities[_("Run Record Editor")] = activities[_("Run Record Editor")] + '&#state=edit&recid=' + str(current_record_id) if activities.has_key(_("Run Document File Manager")): activities[_("Run Document File Manager")] = activities[_("Run Document File Manager")] + '&recid=' + str(current_record_id) try: return tmpl.tmpl_create_adminactivities_menu( ln=ln, selected=navmenuid == 'admin', url_referer=url_referer, guest=int(user_info['guest']), username=get_nickname_or_email(uid), submitter=user_info['precached_viewsubmissions'], referee=user_info['precached_useapprove'], admin=user_info['precached_useadmin'], usebaskets=user_info['precached_usebaskets'], usemessages=user_info['precached_usemessages'], usealerts=user_info['precached_usealerts'], usegroups=user_info['precached_usegroups'], useloans=user_info['precached_useloans'], usestats=user_info['precached_usestats'], activities=activities ) except OperationalError: return "" def list_registered_users(): """List all registered users.""" return run_sql("SELECT id,email FROM user where email!=''") def list_users_in_role(role): """List all users of a given role (see table accROLE) @param role: role of user (string) @return: list of uids """ res = run_sql("""SELECT uacc.id_user FROM user_accROLE uacc JOIN accROLE acc ON uacc.id_accROLE=acc.id WHERE acc.name=%s""", (role,), run_on_slave=True) if res: return map(lambda x: int(x[0]), res) return [] def list_users_in_roles(role_list): """List all users of given roles (see table accROLE) @param role_list: list of roles [string] @return: list of uids """ if not(type(role_list) is list or type(role_list) is tuple): role_list = [role_list] query = """SELECT DISTINCT(uacc.id_user) FROM user_accROLE uacc JOIN accROLE acc ON uacc.id_accROLE=acc.id """ query_addons = "" query_params = () if len(role_list) > 0: query_params = role_list query_addons = " WHERE " for role in role_list[:-1]: query_addons += "acc.name=%s OR " query_addons += "acc.name=%s" res = run_sql(query + query_addons, query_params, run_on_slave=True) if res: return map(lambda x: int(x[0]), res) return [] def get_uid_based_on_pref(prefname, prefvalue): """get the user's UID based where his/her preference prefname has value prefvalue in preferences""" prefs = run_sql("SELECT id, settings FROM user WHERE settings is not NULL") the_uid = None for pref in prefs: try: settings = deserialize_via_marshal(pref[1]) if (settings.has_key(prefname)) and (settings[prefname] == prefvalue): the_uid = pref[0] except: pass return the_uid def get_user_preferences(uid): pref = run_sql("SELECT id, settings FROM user WHERE id=%s", (uid,)) if pref: try: return deserialize_via_marshal(pref[0][1]) except: pass return get_default_user_preferences() # empty dict mean no preferences def set_user_preferences(uid, pref): assert(type(pref) == type({})) run_sql("UPDATE user SET settings=%s WHERE id=%s", (serialize_via_marshal(pref), uid)) def get_default_user_preferences(): user_preference = { 'login_method': ''} if CFG_EXTERNAL_AUTH_DEFAULT in CFG_EXTERNAL_AUTHENTICATION: user_preference['login_method'] = CFG_EXTERNAL_AUTH_DEFAULT return user_preference def get_preferred_user_language(req): def _get_language_from_req_header(accept_language_header): """Extract langs info from req.headers_in['Accept-Language'] which should be set to something similar to: 'fr,en-us;q=0.7,en;q=0.3' """ tmp_langs = {} for lang in accept_language_header.split(','): lang = lang.split(';q=') if len(lang) == 2: lang[1] = lang[1].replace('"', '') # Hack for Yeti robot try: tmp_langs[float(lang[1])] = lang[0] except ValueError: pass else: tmp_langs[1.0] = lang[0] ret = [] priorities = tmp_langs.keys() priorities.sort() priorities.reverse() for priority in priorities: ret.append(tmp_langs[priority]) return ret uid = getUid(req) guest = isGuestUser(uid) new_lang = None preferred_lang = None if not guest: user_preferences = get_user_preferences(uid) preferred_lang = new_lang = user_preferences.get('language', None) if not new_lang: try: new_lang = wash_languages(cgi.parse_qs(req.args)['ln']) except (TypeError, AttributeError, KeyError): pass if not new_lang: try: new_lang = wash_languages(_get_language_from_req_header(req.headers_in['Accept-Language'])) except (TypeError, AttributeError, KeyError): pass new_lang = wash_language(new_lang) if new_lang != preferred_lang and not guest: user_preferences['language'] = new_lang set_user_preferences(uid, user_preferences) return new_lang def collect_user_info(req, login_time=False, refresh=False): """Given the mod_python request object rec or a uid it returns a dictionary containing at least the keys uid, nickname, email, groups, plus any external keys in the user preferences (collected at login time and built by the different external authentication plugins) and if the mod_python request object is provided, also the remote_ip, remote_host, referer, agent fields. NOTE: if req is a mod_python request object, the user_info dictionary is saved into req._user_info (for caching purpouses) setApacheUser & setUid will properly reset it. """ from invenio.search_engine import get_permitted_restricted_collections user_info = { 'remote_ip' : '', 'remote_host' : '', 'referer' : '', 'uri' : '', 'agent' : '', 'uid' :-1, 'nickname' : '', 'email' : '', 'group' : [], 'guest' : '1', 'session' : None, 'precached_permitted_restricted_collections' : [], 'precached_usebaskets' : False, 'precached_useloans' : False, 'precached_usegroups' : False, 'precached_usealerts' : False, 'precached_usemessages' : False, 'precached_viewsubmissions' : False, 'precached_useapprove' : False, 'precached_useadmin' : False, 'precached_usestats' : False, 'precached_viewclaimlink' : False, 'precached_usepaperclaim' : False, 'precached_usepaperattribution' : False, 'precached_canseehiddenmarctags' : False, } try: is_req = False if not req: uid = -1 elif type(req) in (type(1), type(1L)): ## req is infact a user identification uid = req elif type(req) is dict: ## req is by mistake already a user_info try: assert(req.has_key('uid')) assert(req.has_key('email')) assert(req.has_key('nickname')) except AssertionError: ## mmh... misuse of collect_user_info. Better warn the admin! register_exception(alert_admin=True) user_info.update(req) return user_info else: is_req = True uid = getUid(req) if hasattr(req, '_user_info') and not login_time: user_info = req._user_info if not refresh: return req._user_info req._user_info = user_info try: user_info['remote_ip'] = req.remote_ip except gaierror: #FIXME: we should support IPV6 too. (hint for FireRole) pass user_info['session'] = get_session(req).sid() user_info['remote_host'] = req.remote_host or '' user_info['referer'] = req.headers_in.get('Referer', '') user_info['uri'] = req.unparsed_uri or () user_info['agent'] = req.headers_in.get('User-Agent', 'N/A') user_info['uid'] = uid user_info['nickname'] = get_nickname(uid) or '' user_info['email'] = get_email(uid) or '' user_info['group'] = [] user_info['guest'] = str(isGuestUser(uid)) if user_info['guest'] == '1' and CFG_INSPIRE_SITE: usepaperattribution = False viewclaimlink = False if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role(user_info, acc_get_role_id("paperattributionviewers"))): usepaperattribution = True # if (CFG_BIBAUTHORID_ENABLED # and usepaperattribution # and acc_is_user_in_role(user_info, acc_get_role_id("paperattributionlinkviewers"))): # viewclaimlink = True if is_req: session = get_session(req) viewlink = False try: viewlink = session['personinfo']['claim_in_process'] except (KeyError, TypeError): viewlink = False else: viewlink = False if (CFG_BIBAUTHORID_ENABLED and usepaperattribution and viewlink): viewclaimlink = True user_info['precached_viewclaimlink'] = viewclaimlink user_info['precached_usepaperattribution'] = usepaperattribution if user_info['guest'] == '0': user_info['group'] = [group[1] for group in get_groups(uid)] prefs = get_user_preferences(uid) login_method = prefs['login_method'] login_object = CFG_EXTERNAL_AUTHENTICATION[login_method] if login_object and ((datetime.datetime.now() - get_last_login(uid)).seconds > 3600): ## The user uses an external authentication method and it's a bit since ## she has not performed a login if not CFG_EXTERNAL_AUTH_USING_SSO or ( is_req and login_object.in_shibboleth(req)): ## If we're using SSO we must be sure to be in HTTPS and Shibboleth handler ## otherwise we can't really read anything, hence ## it's better skip the synchronization try: groups = login_object.fetch_user_groups_membership(user_info['email'], req=req) # groups is a dictionary {group_name : group_description,} new_groups = {} for key, value in groups.items(): new_groups[key + " [" + str(login_method) + "]"] = value groups = new_groups except (AttributeError, NotImplementedError, TypeError, InvenioWebAccessExternalAuthError): pass else: # Groups synchronization from invenio.webgroup import synchronize_external_groups synchronize_external_groups(uid, groups, login_method) user_info['group'] = [group[1] for group in get_groups(uid)] try: # Importing external settings new_prefs = login_object.fetch_user_preferences(user_info['email'], req=req) for key, value in new_prefs.items(): prefs['EXTERNAL_' + key] = value except (AttributeError, NotImplementedError, TypeError, InvenioWebAccessExternalAuthError): pass else: set_user_preferences(uid, prefs) prefs = get_user_preferences(uid) run_sql('UPDATE user SET last_login=NOW() WHERE id=%s', (uid,)) if prefs: for key, value in prefs.iteritems(): user_info[key.lower()] = value if login_time: ## Heavy computational information from invenio.access_control_engine import acc_authorize_action if CFG_WEBSEARCH_PERMITTED_RESTRICTED_COLLECTIONS_LEVEL > 0: user_info['precached_permitted_restricted_collections'] = get_permitted_restricted_collections(user_info) user_info['precached_usebaskets'] = acc_authorize_action(user_info, 'usebaskets')[0] == 0 user_info['precached_useloans'] = acc_authorize_action(user_info, 'useloans')[0] == 0 user_info['precached_usegroups'] = acc_authorize_action(user_info, 'usegroups')[0] == 0 user_info['precached_usealerts'] = acc_authorize_action(user_info, 'usealerts')[0] == 0 user_info['precached_usemessages'] = acc_authorize_action(user_info, 'usemessages')[0] == 0 user_info['precached_usestats'] = acc_authorize_action(user_info, 'runwebstatadmin')[0] == 0 user_info['precached_viewsubmissions'] = isUserSubmitter(user_info) user_info['precached_useapprove'] = isUserReferee(user_info) user_info['precached_useadmin'] = isUserAdmin(user_info) user_info['precached_canseehiddenmarctags'] = acc_authorize_action(user_info, 'runbibedit')[0] == 0 usepaperclaim = False usepaperattribution = False viewclaimlink = False if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role(user_info, acc_get_role_id("paperclaimviewers"))): usepaperclaim = True if (CFG_BIBAUTHORID_ENABLED and acc_is_user_in_role(user_info, acc_get_role_id("paperattributionviewers"))): usepaperattribution = True if is_req: session = get_session(req) viewlink = False try: viewlink = session['personinfo']['claim_in_process'] except (KeyError, TypeError): viewlink = False else: viewlink = False if (CFG_BIBAUTHORID_ENABLED and usepaperattribution and viewlink): viewclaimlink = True # if (CFG_BIBAUTHORID_ENABLED # and ((usepaperclaim or usepaperattribution) # and acc_is_user_in_role(user_info, acc_get_role_id("paperattributionlinkviewers")))): # viewclaimlink = True user_info['precached_viewclaimlink'] = viewclaimlink user_info['precached_usepaperclaim'] = usepaperclaim user_info['precached_usepaperattribution'] = usepaperattribution except Exception, e: register_exception() return user_info diff --git a/modules/websession/lib/webuser_config.py b/modules/websession/lib/webuser_config.py new file mode 100644 index 000000000..f50b09ed0 --- /dev/null +++ b/modules/websession/lib/webuser_config.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +## +## This file is part of Invenio. +## Copyright (C) 2011 CERN. +## +## Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +""" +webuser_config.py - magic constants for webuser module. +""" + +## Used by merge_usera_into_userb, contains the list of which SQL tables refer +## to the external key id_user, and what column contains this information. +CFG_WEBUSER_USER_TABLES = ( + ("rnkPAGEVIEWS", "id_user"), + ("rnkDOWNLOADS", "id_user"), + ("session", "uid"), + ("user_usergroup", "id_user"), + ("user_accROLE", "id_user"), + ("user_query", "id_user"), + ("user_query_basket", "id_user"), + ("bskREC", "id_user_who_added_item"), + ("user_bskBASKET", "id_user"), + ("bskRECORDCOMMENT", "id_user"), + ("msgMESSAGE", "id_user_from"), + ("user_msgMESSAGE", "id_user_to"), + ("cmtRECORDCOMMENT", "id_user"), + ("cmtACTIONHISTORY", "id_user"), + ("cmtSUBSCRIPTION", "id_user"), + ("user_expJOB", "id_user"), + ("swrCLIENTDATA", "id_user"), + ("sbmCOOKIES", "uid"), + ("userEXT", "id_user"), + ("aidUSERINPUTLOG", "userid"), +) diff --git a/modules/webstat/etc/webstat.cfg b/modules/webstat/etc/webstat.cfg index 213597950..87d975bc7 100644 --- a/modules/webstat/etc/webstat.cfg +++ b/modules/webstat/etc/webstat.cfg @@ -1,67 +1,74 @@ # WebStat config file # if you modify this file run: # $ ./bin/webstatadmin --load-config [general] visitors_box = True search_box = True record_box = True bibsched_box = True basket_box = True alert_box = True loan_box = True apache_box = True uptime_box = True [webstat_custom_event_1] name = baskets param1 = action param2 = basket param3 = user [webstat_custom_event_2] name = alerts param1 = action param2 = alert param3 = user [webstat_custom_event_3] name = journals param1 = action param2 = journal_name param3 = issue_number param4 = category param5 = language param6 = articleid [webstat_custom_event_4] name = websubmissions param1 = doctype [webstat_custom_event_5] name = loanrequest param1 = request_id param2 = loan_id [webstat_custom_event_6] name = login param1 = IP param2 = UID param3 = email +[webstat_custom_event_7] +name = apikeyusage +param1 = user_id +param2 = key_id +param3 = path +param4 = query + [apache_log_analyzer] profile = nil nb-histogram-items-to-print = 20 exclude-ip-list = ("137.138.249.162" "137.138.246.86") home-collection = "Atlantis Institute of Fictive Science" search-interface-url = "/collection/" search-interface-url-old-style = "/?" detailed-record-url = "/record/" search-engine-url = "/search?" search-engine-url-old-style = "/search.py?" basket-url = "/yourbaskets/" add-to-basket-url = "/yourbaskets/add" display-basket-url = "/yourbaskets/display" display-public-basket-url = "/yourbaskets/display_public" alert-url = "/youralerts/" display-your-alerts-url = "/youralerts/list" display-your-searches-url = "/youralerts/display" diff --git a/modules/webstyle/lib/webinterface_handler.py b/modules/webstyle/lib/webinterface_handler.py index a7c751afd..374873973 100644 --- a/modules/webstyle/lib/webinterface_handler.py +++ b/modules/webstyle/lib/webinterface_handler.py @@ -1,540 +1,552 @@ # -*- coding: utf-8 -*- - +## ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Apache request handler mechanism. It gives the tools to map url to functions, handles the legacy url scheme (/search.py queries), HTTP/HTTPS switching, language specification,... """ __revision__ = "$Id$" ## Import the remote debugger as a first thing, if allowed try: import invenio.remote_debugger as remote_debugger except: remote_debugger = None import urlparse import cgi import sys import re import os import gc from invenio import webinterface_handler_config as apache from invenio.config import CFG_SITE_URL, CFG_SITE_SECURE_URL, CFG_TMPDIR, \ CFG_SITE_RECORD, CFG_ACCESS_CONTROL_LEVEL_SITE from invenio.messages import wash_language from invenio.urlutils import redirect_to_url from invenio.errorlib import register_exception from invenio.webuser import get_preferred_user_language, isGuestUser, \ - getUid, isUserSuperAdmin, collect_user_info + getUid, isUserSuperAdmin, collect_user_info, setUid from invenio.webinterface_handler_wsgi_utils import StringField from invenio.session import get_session +from invenio import web_api_key + ## The following variable is True if the installation make any difference ## between HTTP Vs. HTTPS connections. CFG_HAS_HTTPS_SUPPORT = CFG_SITE_SECURE_URL.startswith("https://") ## The following variable is True if HTTPS is used for *any* URL. CFG_FULL_HTTPS = CFG_SITE_URL.lower().startswith("https://") ## Set this to True in order to log some more information. DEBUG = False # List of URIs for which the 'ln' argument must not be added # automatically CFG_NO_LANG_RECOGNITION_URIS = ['/rss', '/oai2d', '/journal'] RE_SLASHES = re.compile('/+') RE_SPECIAL_URI = re.compile('^/%s/\d+|^/collection/.+' % CFG_SITE_RECORD) _RE_BAD_MSIE = re.compile("MSIE\s+(\d+\.\d+)") def _debug(req, msg): """ Log the message. @param req: the request. @param msg: the message. @type msg: string """ if DEBUG: req.log_error(msg) def _check_result(req, result): """ Check that a page handler actually wrote something, and properly finish the apache request. @param req: the request. @param result: the produced output. @type result: string @return: an apache error code @rtype: int @raise apache.SERVER_RETURN: in case of a HEAD request. @note: that this function actually takes care of writing the result to the client. """ if result or req.bytes_sent > 0: if result is None: result = "" else: result = str(result) # unless content_type was manually set, we will attempt # to guess it if not req.content_type_set_p: # make an attempt to guess content-type if result[:100].strip()[:6].lower() == '' \ or result.find(' 0: req.content_type = 'text/html' else: req.content_type = 'text/plain' if req.header_only: if req.status in (apache.HTTP_NOT_FOUND, ): raise apache.SERVER_RETURN, req.status else: req.write(result) return apache.OK else: req.log_error("publisher: %s returned nothing." % `object`) return apache.HTTP_INTERNAL_SERVER_ERROR class TraversalError(Exception): """ Exception raised in case of an error in parsing the URL of the request. """ pass class WebInterfaceDirectory(object): """ A directory groups web pages, and can delegate dispatching of requests to the actual handler. This has been heavily borrowed from Quixote's dispatching mechanism, with specific adaptations. """ # Lists the valid URLs contained in this directory. _exports = [] # Set this to True in order to redirect queries over HTTPS _force_https = False def _translate(self, component): """(component : string) -> string | None Translate a path component into a Python identifier. Returning None signifies that the component does not exist. """ if component in self._exports: if component == '': return 'index' # implicit mapping else: return component else: # check for an explicit external to internal mapping for value in self._exports: if isinstance(value, tuple): if value[0] == component: return value[1] else: return None def _lookup(self, component, path): """ Override this method if you need to map dynamic URLs. It can eat up as much of the remaining path as needed, and return the remaining parts, so that the traversal can continue. """ return None, path def _traverse(self, req, path, do_head=False, guest_p=True): """ Locate the handler of an URI by traversing the elements of the path.""" _debug(req, 'traversing %r' % path) component, path = path[0], path[1:] name = self._translate(component) if name is None: obj, path = self._lookup(component, path) else: obj = getattr(self, name) if obj is None: _debug(req, 'could not resolve %s' % repr((component, path))) raise TraversalError() # We have found the next segment. If we know that from this # point our subpages are over HTTPS, do the switch. if (CFG_FULL_HTTPS or CFG_HAS_HTTPS_SUPPORT and (self._force_https or get_session(req).need_https)) and not req.is_https(): # We need to isolate the part of the URI that is after # CFG_SITE_URL, and append that to our CFG_SITE_SECURE_URL. original_parts = urlparse.urlparse(req.unparsed_uri) plain_prefix_parts = urlparse.urlparse(CFG_SITE_URL) secure_prefix_parts = urlparse.urlparse(CFG_SITE_SECURE_URL) # Compute the new path plain_path = original_parts[2] plain_path = secure_prefix_parts[2] + \ plain_path[len(plain_prefix_parts[2]):] # ...and recompose the complete URL final_parts = list(secure_prefix_parts) final_parts[2] = plain_path final_parts[-3:] = original_parts[-3:] target = urlparse.urlunparse(final_parts) ## The following condition used to allow certain URLs to ## by-pass the forced SSL redirect. Since SSL certificates ## are deployed on INSPIRE, this is no longer needed ## Will be left here for reference. #from invenio.config import CFG_INSPIRE_SITE #if not CFG_INSPIRE_SITE or plain_path.startswith('/youraccount/login'): redirect_to_url(req, target) # Continue the traversal. If there is a path, continue # resolving, otherwise call the method as it is our final # renderer. We even pass it the parsed form arguments. if path: if hasattr(obj, '_traverse'): return obj._traverse(req, path, do_head, guest_p) else: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND if do_head: req.content_type = "text/html; charset=UTF-8" raise apache.SERVER_RETURN, apache.DONE form = req.form if 'ln' not in form and \ req.uri not in CFG_NO_LANG_RECOGNITION_URIS: ln = get_preferred_user_language(req) form.add_field('ln', ln) result = _check_result(req, obj(req, form)) return result def __call__(self, req, form): """ Maybe resolve the final / of a directory """ # When this method is called, we either are a directory which # has an 'index' method, and we redirect to it, or we don't # have such a method, in which case it is a traversal error. if "" in self._exports: if not form: # Fix missing trailing slash as a convenience, unless # we are processing a form (in which case it is better # to fix the form posting). redirect_to_url(req, req.uri + "/", apache.HTTP_MOVED_PERMANENTLY) _debug(req, 'directory %r is not callable' % self) raise TraversalError() def create_handler(root): """ Return a handler function that will dispatch apache requests through the URL layout passed in parameter.""" def _profiler(req): """ This handler wrap the default handler with a profiler. Profiling data is written into CFG_TMPDIR/invenio-profile-stats-datetime.raw, and is displayed at the bottom of the webpage. To use add profile=1 to your url. To change sorting algorithm you can provide profile=algorithm_name. You can add more than one profile requirement like ?profile=time&profile=cumulative. The list of available algorithm is displayed at the end of the profile. """ args = {} if req.args: args = cgi.parse_qs(req.args) if 'profile' in args: if not isUserSuperAdmin(collect_user_info(req)): return _handler(req) if 'memory' in args['profile']: gc.set_debug(gc.DEBUG_LEAK) ret = _handler(req) req.write("\n

    %s
    " % gc.garbage) gc.collect() req.write("\n
    %s
    " % gc.garbage) gc.set_debug(0) return ret from cStringIO import StringIO try: import pstats except ImportError: ret = _handler(req) req.write("
    %s
    " % "The Python Profiler is not installed!") return ret import datetime date = datetime.datetime.now().strftime('%Y%m%d%H%M%S') filename = '%s/invenio-profile-stats-%s.raw' % (CFG_TMPDIR, date) existing_sorts = pstats.Stats.sort_arg_dict_default.keys() required_sorts = [] profile_dump = [] for sort in args['profile']: if sort not in existing_sorts: sort = 'cumulative' if sort not in required_sorts: required_sorts.append(sort) if sys.hexversion < 0x02050000: import hotshot import hotshot.stats pr = hotshot.Profile(filename) ret = pr.runcall(_handler, req) for sort_type in required_sorts: tmp_out = sys.stdout sys.stdout = StringIO() hotshot.stats.load(filename).strip_dirs().sort_stats(sort_type).print_stats() # pylint: disable=E1103 # This is a hack. sys.stdout was replaced by a StringIO. profile_dump.append(sys.stdout.getvalue()) # pylint: enable=E1103 sys.stdout = tmp_out else: import cProfile pr = cProfile.Profile() ret = pr.runcall(_handler, req) pr.dump_stats(filename) for sort_type in required_sorts: strstream = StringIO() pstats.Stats(filename, stream=strstream).strip_dirs().sort_stats(sort_type).print_stats() profile_dump.append(strstream.getvalue()) profile_dump = '\n'.join(profile_dump) profile_dump += '\nYou can use profile=%s or profile=memory' % existing_sorts req.write("\n
    %s
    " % profile_dump) return ret elif 'debug' in args and args['debug']: #remote_debugger.start(["3"]) # example starting debugger on demand if remote_debugger: debug_starter = remote_debugger.get_debugger(args['debug']) if debug_starter: try: debug_starter() except Exception, msg: # TODO - should register_exception? raise Exception('Cannot start the debugger %s, please read instructions inside remote_debugger module. %s' % (debug_starter.__name__, msg)) else: raise Exception('Debugging requested, but no debugger registered: "%s"' % args['debug']) return _handler(req) else: return _handler(req) def _handler(req): """ This handler is invoked by mod_python with the apache request.""" try: - allowed_methods = ("GET", "POST", "HEAD", "OPTIONS") + allowed_methods = ("GET", "POST", "HEAD", "OPTIONS", "PUT") req.allow_methods(allowed_methods, 1) if req.method not in allowed_methods: raise apache.SERVER_RETURN, apache.HTTP_METHOD_NOT_ALLOWED if req.method == 'OPTIONS': ## OPTIONS is used to now which method are allowed req.headers_out['Allow'] = ', '.join(allowed_methods) raise apache.SERVER_RETURN, apache.OK # Set user agent for fckeditor.py, which needs it here os.environ["HTTP_USER_AGENT"] = req.headers_in.get('User-Agent', '') + # Check if REST authentication can be performed + if req.args: + args = cgi.parse_qs(req.args) + if 'apikey' in args and req.is_https(): + uid = web_api_key.acc_get_uid_from_request(req.uri, req.args) + if uid < 0: + raise apache.SERVER_RETURN, apache.HTTP_UNAUTHORIZED + else: + setUid(req=req, uid=uid) + guest_p = isGuestUser(getUid(req), run_on_slave=False) uri = req.uri if uri == '/': path = [''] else: ## Let's collapse multiple slashes into a single / uri = RE_SLASHES.sub('/', uri) path = uri[1:].split('/') if CFG_ACCESS_CONTROL_LEVEL_SITE > 1: ## If the site is under maintainance mode let's return ## 503 to casual crawler to avoid having the site being ## indexed req.status = 503 if uri.startswith('/yours') or not guest_p: ## Private/personalized request should not be cached g = _RE_BAD_MSIE.search(req.headers_in.get('User-Agent', "MSIE 6.0")) bad_msie = g and float(g.group(1)) < 9.0 if bad_msie: req.headers_out['Cache-Control'] = 'private, max-age=0, must-revalidate' else: req.headers_out['Cache-Control'] = 'private, no-cache, no-store, max-age=0, must-revalidate' req.headers_out['Pragma'] = 'no-cache' req.headers_out['Vary'] = '*' else: req.headers_out['Cache-Control'] = 'public, max-age=3600' req.headers_out['Vary'] = 'Cookie, ETag, Cache-Control' try: if req.header_only and not RE_SPECIAL_URI.match(req.uri): return root._traverse(req, path, True, guest_p) else: ## bibdocfile have a special treatment for HEAD return root._traverse(req, path, False, guest_p) except TraversalError: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND except apache.SERVER_RETURN: ## This is one of mod_python way of communicating raise except IOError, exc: if 'Write failed, client closed connection' not in "%s" % exc: ## Workaround for considering as false positive exceptions ## rised by mod_python when the user close the connection ## or in some other rare and not well identified cases. register_exception(req=req, alert_admin=True) raise except Exception: # send the error message, much more convenient than log hunting if remote_debugger: args = {} if req.args: args = cgi.parse_qs(req.args) if 'debug' in args: remote_debugger.error_msg(args['debug']) register_exception(req=req, alert_admin=True) raise # Serve an error by default. raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND finally: if hasattr(req, '_session'): ## The session handler saves for caching a request_wrapper ## in req. ## This saves req as an attribute, creating a circular ## reference. ## Since we have have reached the end of the request handler ## we can safely drop the request_wrapper so to avoid ## memory leaks. delattr(req, '_session') if hasattr(req, '_user_info'): ## For the same reason we can delete the user_info. delattr(req, '_user_info') ## as suggested in ## del gc.garbage[:] return _profiler def wash_urlargd(form, content): """ Wash the complete form based on the specification in content. Content is a dictionary containing the field names as a key, and a tuple (type, default) as value. 'type' can be list, str, invenio.webinterface_handler_wsgi_utils.StringField, int, tuple, or invenio.webinterface_handler_wsgi_utils.Field (for file uploads). The specification automatically includes the 'ln' field, which is common to all queries. Arguments that are not defined in 'content' are discarded. Note that in case {list,tuple} were asked for, we assume that {list,tuple} of strings is to be returned. Therefore beware when you want to use wash_urlargd() for multiple file upload forms. @Return: argd dictionary that can be used for passing function parameters by keywords. """ result = {} content['ln'] = (str, '') for k, (dst_type, default) in content.items(): try: value = form[k] except KeyError: result[k] = default continue src_type = type(value) # First, handle the case where we want all the results. In # this case, we need to ensure all the elements are strings, # and not Field instances. if src_type in (list, tuple): if dst_type is list: result[k] = [str(x) for x in value] continue if dst_type is tuple: result[k] = tuple([str(x) for x in value]) continue # in all the other cases, we are only interested in the # first value. value = value[0] # Maybe we already have what is expected? Then don't change # anything. if isinstance(value, dst_type): if isinstance(value, StringField): result[k] = str(value) else: result[k] = value continue # Since we got here, 'value' is sure to be a single symbol, # not a list kind of structure anymore. if dst_type in (str, int): try: result[k] = dst_type(value) except: result[k] = default elif dst_type is tuple: result[k] = (str(value), ) elif dst_type is list: result[k] = [str(value)] else: raise ValueError('cannot cast form value %s of type %r into type %r' % (value, src_type, dst_type)) result['ln'] = wash_language(result['ln']) return result diff --git a/modules/webstyle/lib/webinterface_handler_wsgi_utils.py b/modules/webstyle/lib/webinterface_handler_wsgi_utils.py index bcbf0c0ad..e43fbf615 100644 --- a/modules/webstyle/lib/webinterface_handler_wsgi_utils.py +++ b/modules/webstyle/lib/webinterface_handler_wsgi_utils.py @@ -1,884 +1,883 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ mod_python->WSGI Framework utilities This code has been taken from mod_python original source code and rearranged here to easying the migration from mod_python to wsgi. The code taken from mod_python is under the following License. """ # Copyright 2004 Apache Software Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); you # may not use this file except in compliance with the License. You # may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. # # Originally developed by Gregory Trubetskoy. # # $Id: apache.py 468216 2006-10-27 00:54:12Z grahamd $ try: import threading except: import dummy_threading as threading from wsgiref.headers import Headers import time import re import os import cgi import cStringIO import tempfile from types import TypeType, ClassType, BuiltinFunctionType, MethodType, ListType from invenio.config import CFG_TMPDIR, CFG_TMPSHAREDDIR from invenio.webinterface_handler_config import \ SERVER_RETURN, \ HTTP_LENGTH_REQUIRED, \ HTTP_BAD_REQUEST, \ InvenioWebInterfaceWSGIContentLenghtError, \ InvenioWebInterfaceWSGIContentTypeError, \ InvenioWebInterfaceWSGIContentMD5Error # Cache for values of PythonPath that have been seen already. _path_cache = {} _path_cache_lock = threading.Lock() class table(Headers): add = Headers.add_header iteritems = Headers.items def __getitem__(self, name): ret = Headers.__getitem__(self, name) if ret is None: return '' else: return str(ret) ## Some functions made public exists_config_define = lambda dummy: True ## Some constants class metaCookie(type): def __new__(cls, clsname, bases, clsdict): _valid_attr = ( "version", "path", "domain", "secure", "comment", "expires", "max_age", # RFC 2965 "commentURL", "discard", "port", # Microsoft Extension "httponly" ) # _valid_attr + property values # (note __slots__ is a new Python feature, it # prevents any other attribute from being set) __slots__ = _valid_attr + ("name", "value", "_value", "_expires", "__data__") clsdict["_valid_attr"] = _valid_attr clsdict["__slots__"] = __slots__ def set_expires(self, value): if type(value) == type(""): # if it's a string, it should be # valid format as per Netscape spec try: t = time.strptime(value, "%a, %d-%b-%Y %H:%M:%S GMT") except ValueError: raise ValueError, "Invalid expires time: %s" % value t = time.mktime(t) else: # otherwise assume it's a number # representing time as from time.time() t = value value = time.strftime("%a, %d-%b-%Y %H:%M:%S GMT", time.gmtime(t)) self._expires = "%s" % value def get_expires(self): return self._expires clsdict["expires"] = property(fget=get_expires, fset=set_expires) return type.__new__(cls, clsname, bases, clsdict) class Cookie(object): """ This class implements the basic Cookie functionality. Note that unlike the Python Standard Library Cookie class, this class represents a single cookie (not a list of Morsels). """ __metaclass__ = metaCookie DOWNGRADE = 0 IGNORE = 1 EXCEPTION = 3 def parse(Class, str, **kw): """ Parse a Cookie or Set-Cookie header value, and return a dict of Cookies. Note: the string should NOT include the header name, only the value. """ dict = _parse_cookie(str, Class, **kw) return dict parse = classmethod(parse) def __init__(self, name, value, **kw): """ This constructor takes at least a name and value as the arguments, as well as optionally any of allowed cookie attributes as defined in the existing cookie standards. """ self.name, self.value = name, value for k in kw: setattr(self, k.lower(), kw[k]) # subclasses can use this for internal stuff self.__data__ = {} def __str__(self): """ Provides the string representation of the Cookie suitable for sending to the browser. Note that the actual header name will not be part of the string. This method makes no attempt to automatically double-quote strings that contain special characters, even though the RFC's dictate this. This is because doing so seems to confuse most browsers out there. """ result = ["%s=%s" % (self.name, self.value)] # pylint: disable=E1101 # The attribute _valid_attr is provided by the metaclass 'metaCookie'. for name in self._valid_attr: if hasattr(self, name): if name in ("secure", "discard", "httponly"): result.append(name) else: result.append("%s=%s" % (name, getattr(self, name))) # pylint: enable=E1101 return "; ".join(result) def __repr__(self): return '<%s: %s>' % (self.__class__.__name__, str(self)) # This is a simplified and in some places corrected # (at least I think it is) pattern from standard lib Cookie.py _cookiePattern = re.compile( r"(?x)" # Verbose pattern r"[,\ ]*" # space/comma (RFC2616 4.2) before attr-val is eaten r"(?P" # Start of group 'key' r"[^;\ =]+" # anything but ';', ' ' or '=' r")" # End of group 'key' r"\ *(=\ *)?" # a space, then may be "=", more space r"(?P" # Start of group 'val' r'"(?:[^\\"]|\\.)*"' # a doublequoted string r"|" # or r"[^;]*" # any word or empty string r")" # End of group 'val' r"\s*;?" # probably ending in a semi-colon ) def _parse_cookie(str, Class, names=None): # XXX problem is we should allow duplicate # strings result = {} matchIter = _cookiePattern.finditer(str) for match in matchIter: key, val = match.group("key"), match.group("val") # We just ditch the cookies names which start with a dollar sign since # those are in fact RFC2965 cookies attributes. See bug [#MODPYTHON-3]. if key[0] != '$' and names is None or key in names: result[key] = Class(key, val) return result def add_cookies(req, cookies): """ Sets one or more cookie in outgoing headers and adds a cache directive so that caches don't cache the cookie. """ if not req.headers_out.has_key("Set-Cookie"): req.headers_out.add("Cache-Control", 'no-cache="set-cookie"') for cookie in cookies: req.headers_out.add("Set-Cookie", str(cookie)) def get_cookies(req, Class=Cookie, **kw): """ A shorthand for retrieveing and parsing cookies given a Cookie class. The class must be one of the classes from this module. """ if not req.headers_in.has_key("cookie"): return {} cookies = req.headers_in["cookie"] if type(cookies) == type([]): cookies = '; '.join(cookies) return Class.parse(cookies, **kw) def get_cookie(req, name, Class=Cookie, **kw): cookies = get_cookies(req, Class, names=[name], **kw) if cookies.has_key(name): return cookies[name] parse_qs = cgi.parse_qs parse_qsl = cgi.parse_qsl # Maximum line length for reading. (64KB) # Fixes memory error when upload large files such as 700+MB ISOs. readBlockSize = 65368 """ The classes below are a (almost) a drop-in replacement for the standard cgi.py FieldStorage class. They should have pretty much the same functionality. These classes differ in that unlike cgi.FieldStorage, they are not recursive. The class FieldStorage contains a list of instances of Field class. Field class is incapable of storing anything in it. These objects should be considerably faster than the ones in cgi.py because they do not expect CGI environment, and are optimized specifically for Apache and mod_python. """ class Field: def __init__(self, name, *args, **kwargs): self.name = name # Some third party packages such as Trac create # instances of the Field object and insert it # directly into the list of form fields. To # maintain backward compatibility check for # where more than just a field name is supplied # and invoke an additional initialisation step # to process the arguments. Ideally, third party # code should use the add_field() method of the # form, but if they need to maintain backward # compatibility with older versions of mod_python # they will not have a choice but to use old # way of doing things and thus we need this code # for the forseeable future to cope with that. if args or kwargs: self.__bc_init__(*args, **kwargs) def __bc_init__(self, file, ctype, type_options, disp, disp_options, headers = {}): self.file = file self.type = ctype self.type_options = type_options self.disposition = disp self.disposition_options = disp_options if disp_options.has_key("filename"): self.filename = disp_options["filename"] else: self.filename = None self.headers = headers def __repr__(self): """Return printable representation.""" return "Field(%s, %s)" % (`self.name`, `self.value`) def __getattr__(self, name): if name != 'value': raise AttributeError, name if self.file: self.file.seek(0) value = self.file.read() self.file.seek(0) else: value = None return value def __del__(self): self.file.close() class StringField(str): """ This class is basically a string with added attributes for compatibility with std lib cgi.py. Basically, this works the opposite of Field, as it stores its data in a string, but creates a file on demand. Field creates a value on demand and stores data in a file. """ filename = None headers = {} ctype = "text/plain" type_options = {} disposition = None disp_options = None def __new__(cls, value): '''Create StringField instance. You'll have to set name yourself.''' obj = str.__new__(cls, value) obj.value = value return obj def __str__(self): return str.__str__(self) def __getattr__(self, name): if name != 'file': raise AttributeError, name self.file = cStringIO.StringIO(self.value) return self.file def __repr__(self): """Return printable representation (to pass unit tests).""" return "Field(%s, %s)" % (`self.name`, `self.value`) class FieldList(list): def __init__(self): self.__table = None list.__init__(self) def table(self): if self.__table is None: self.__table = {} for item in self: if item.name in self.__table: self.__table[item.name].append(item) else: self.__table[item.name] = [item] return self.__table def __delitem__(self, *args): self.__table = None return list.__delitem__(self, *args) def __delslice__(self, *args): self.__table = None return list.__delslice__(self, *args) def __iadd__(self, *args): self.__table = None return list.__iadd__(self, *args) def __imul__(self, *args): self.__table = None return list.__imul__(self, *args) def __setitem__(self, *args): self.__table = None return list.__setitem__(self, *args) def __setslice__(self, *args): self.__table = None return list.__setslice__(self, *args) def append(self, *args): self.__table = None return list.append(self, *args) def extend(self, *args): self.__table = None return list.extend(self, *args) def insert(self, *args): self.__table = None return list.insert(self, *args) def pop(self, *args): self.__table = None return list.pop(self, *args) def remove(self, *args): self.__table = None return list.remove(self, *args) class FieldStorage: def __init__(self, req, keep_blank_values=0, strict_parsing=0, file_callback=None, field_callback=None, to_tmp_shared=False): # # Whenever readline is called ALWAYS use the max size EVEN when # not expecting a long line. - this helps protect against # malformed content from exhausting memory. # self.list = FieldList() - self.wsgi_input_consumed = True + self.wsgi_input_consumed = False # always process GET-style parameters if req.args: pairs = parse_qsl(req.args, keep_blank_values) for pair in pairs: self.add_field(pair[0], pair[1]) if req.method != "POST": return try: clen = int(req.headers_in["content-length"]) except (KeyError, ValueError): # absent content-length is not acceptable raise SERVER_RETURN, HTTP_LENGTH_REQUIRED self.clen = clen self.count = 0 if not req.headers_in.has_key("content-type"): ctype = "application/x-www-form-urlencoded" else: ctype = req.headers_in["content-type"] if ctype.startswith("application/x-www-form-urlencoded"): pairs = parse_qsl(req.read(clen), keep_blank_values) + self.wsgi_input_consumed = True for pair in pairs: self.add_field(pair[0], pair[1]) return - - - if not ctype.startswith("multipart/"): + elif not ctype.startswith("multipart/"): # we don't understand this content-type - self.wsgi_input_consumed = False return + self.wsgi_input_consumed = True # figure out boundary try: i = ctype.lower().rindex("boundary=") boundary = ctype[i+9:] if len(boundary) >= 2 and boundary[0] == boundary[-1] == '"': boundary = boundary[1:-1] boundary = re.compile("--" + re.escape(boundary) + "(--)?\r?\n") except ValueError: raise SERVER_RETURN, HTTP_BAD_REQUEST # read until boundary self.read_to_boundary(req, boundary, None) end_of_stream = False while not end_of_stream and not self.eof(): # jjj JIM BEGIN WHILE ## parse headers ctype, type_options = "text/plain", {} disp, disp_options = None, {} headers = table([]) line = req.readline(readBlockSize) self.count += len(line) if self.eof(): end_of_stream = True match = boundary.match(line) if (not line) or match: # we stop if we reached the end of the stream or a stop # boundary (which means '--' after the boundary) we # continue to the next part if we reached a simple # boundary in either case this would mean the entity is # malformed, but we're tolerating it anyway. end_of_stream = (not line) or (match.group(1) is not None) continue skip_this_part = False while line not in ('\r','\r\n'): nextline = req.readline(readBlockSize) self.count += len(nextline) if self.eof(): end_of_stream = True while nextline and nextline[0] in [ ' ', '\t']: line = line + nextline nextline = req.readline(readBlockSize) self.count += len(nextline) if self.eof(): end_of_stream = True # we read the headers until we reach an empty line # NOTE : a single \n would mean the entity is malformed, but # we're tolerating it anyway h, v = line.split(":", 1) headers.add(h, v) h = h.lower() if h == "content-disposition": disp, disp_options = parse_header(v) elif h == "content-type": ctype, type_options = parse_header(v) # # NOTE: FIX up binary rubbish sent as content type # from Microsoft IE 6.0 when sending a file which # does not have a suffix. # if ctype.find('/') == -1: ctype = 'application/octet-stream' line = nextline match = boundary.match(line) if (not line) or match: # we stop if we reached the end of the stream or a # stop boundary (which means '--' after the # boundary) we continue to the next part if we # reached a simple boundary in either case this # would mean the entity is malformed, but we're # tolerating it anyway. skip_this_part = True end_of_stream = (not line) or (match.group(1) is not None) break if skip_this_part: continue if disp_options.has_key("name"): name = disp_options["name"] else: name = None # create a file object # is this a file? if disp_options.has_key("filename"): if file_callback and callable(file_callback): file = file_callback(disp_options["filename"]) else: if to_tmp_shared: file = tempfile.NamedTemporaryFile(dir=CFG_TMPSHAREDDIR) else: file = tempfile.NamedTemporaryFile(dir=CFG_TMPDIR) else: if field_callback and callable(field_callback): file = field_callback() else: file = cStringIO.StringIO() # read it in self.read_to_boundary(req, boundary, file) if self.eof(): end_of_stream = True file.seek(0) # make a Field if disp_options.has_key("filename"): field = Field(name) field.filename = disp_options["filename"] else: field = StringField(file.read()) field.name = name field.file = file field.type = ctype field.type_options = type_options field.disposition = disp field.disposition_options = disp_options field.headers = headers self.list.append(field) def add_field(self, key, value): """Insert a field as key/value pair""" item = StringField(value) item.name = key self.list.append(item) def __setitem__(self, key, value): table = self.list.table() if table.has_key(key): items = table[key] for item in items: self.list.remove(item) item = StringField(value) item.name = key self.list.append(item) def read_to_boundary(self, req, boundary, file): previous_delimiter = None while not self.eof(): line = req.readline(readBlockSize) self.count += len(line) if not line: # end of stream if file is not None and previous_delimiter is not None: file.write(previous_delimiter) return True match = boundary.match(line) if match: # the line is the boundary, so we bail out # if the two last chars are '--' it is the end of the entity return match.group(1) is not None if line[-2:] == '\r\n': # the line ends with a \r\n, which COULD be part # of the next boundary. We write the previous line delimiter # then we write the line without \r\n and save it for the next # iteration if it was not part of the boundary if file is not None: if previous_delimiter is not None: file.write(previous_delimiter) file.write(line[:-2]) previous_delimiter = '\r\n' elif line[-1:] == '\r': # the line ends with \r, which is only possible if # readBlockSize bytes have been read. In that case the # \r COULD be part of the next boundary, so we save it # for the next iteration assert len(line) == readBlockSize if file is not None: if previous_delimiter is not None: file.write(previous_delimiter) file.write(line[:-1]) previous_delimiter = '\r' elif line == '\n' and previous_delimiter == '\r': # the line us a single \n and we were in the middle of a \r\n, # so we complete the delimiter previous_delimiter = '\r\n' else: if file is not None: if previous_delimiter is not None: file.write(previous_delimiter) file.write(line) previous_delimiter = None def eof(self): return self.clen <= self.count def __getitem__(self, key): """Dictionary style indexing.""" found = self.list.table()[key] if len(found) == 1: return found[0] else: return found def get(self, key, default): try: return self.__getitem__(key) except (TypeError, KeyError): return default def keys(self): """Dictionary style keys() method.""" return self.list.table().keys() def __iter__(self): return iter(self.keys()) def __repr__(self): return repr(self.list.table()) def has_key(self, key): """Dictionary style has_key() method.""" return (key in self.list.table()) __contains__ = has_key def __len__(self): """Dictionary style len(x) support.""" return len(self.list.table()) def getfirst(self, key, default=None): """ return the first value received """ try: return self.list.table()[key][0] except KeyError: return default def getlist(self, key): """ return a list of received values """ try: return self.list.table()[key] except KeyError: return [] def items(self): """Dictionary-style items(), except that items are returned in the same order as they were supplied in the form.""" return [(item.name, item) for item in self.list] def __delitem__(self, key): table = self.list.table() values = table[key] for value in values: self.list.remove(value) def clear(self): self.list = FieldList() def parse_header(line): """Parse a Content-type like header. Return the main content-type and a dictionary of options. """ plist = map(lambda a: a.strip(), line.split(';')) key = plist[0].lower() del plist[0] pdict = {} for p in plist: i = p.find('=') if i >= 0: name = p[:i].strip().lower() value = p[i+1:].strip() if len(value) >= 2 and value[0] == value[-1] == '"': value = value[1:-1] pdict[name] = value return key, pdict def apply_fs_data(object, fs, **args): """ Apply FieldStorage data to an object - the object must be callable. Examine the args, and match then with fs data, then call the object, return the result. """ # we need to weed out unexpected keyword arguments # and for that we need to get a list of them. There # are a few options for callable objects here: fc = None expected = [] if hasattr(object, "func_code"): # function fc = object.func_code expected = fc.co_varnames[0:fc.co_argcount] elif hasattr(object, 'im_func'): # method fc = object.im_func.func_code expected = fc.co_varnames[1:fc.co_argcount] elif type(object) in (TypeType,ClassType): # class fc = object.__init__.im_func.func_code expected = fc.co_varnames[1:fc.co_argcount] elif type(object) is BuiltinFunctionType: # builtin fc = None expected = [] elif hasattr(object, '__call__'): # callable object if type(object.__call__) is MethodType: fc = object.__call__.im_func.func_code expected = fc.co_varnames[1:fc.co_argcount] else: # abuse of objects to create hierarchy return apply_fs_data(object.__call__, fs, **args) # add form data to args for field in fs.list: if field.filename: val = field else: val = field.value args.setdefault(field.name, []).append(val) # replace lists with single values for arg in args: if ((type(args[arg]) is ListType) and (len(args[arg]) == 1)): args[arg] = args[arg][0] # remove unexpected args unless co_flags & 0x08, # meaning function accepts **kw syntax if fc is None: args = {} elif not (fc.co_flags & 0x08): for name in args.keys(): if name not in expected: del args[name] return object(**args) RE_CDISPOSITION_FILENAME = re.compile(r'filename=(?P[\w\.]*)') def handle_file_post(req, allowed_mimetypes=None): """ Handle the POST of a file. - @return: the a tuple with th full path to the file saved on disk, + @return: the a tuple with the full path to the file saved on disk, and it's mimetype as provided by the request. @rtype: (string, string) """ from invenio.bibdocfile import decompose_file, md5 ## We retrieve the length clen = req.headers_in["Content-Length"] if clen is None: raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header is missing") try: clen = int(clen) assert (clen > 1) except (ValueError, AssertionError): raise InvenioWebInterfaceWSGIContentLenghtError("Content-Length header should contain a positive integer") ## Let's take the content type ctype = req.headers_in["Content-Type"] if allowed_mimetypes and ctype not in allowed_mimetypes: raise InvenioWebInterfaceWSGIContentTypeError("Content-Type not in allowed list of content types: %s" % allowed_mimetypes) ## Let's optionally accept a suggested filename suffix = prefix = '' g = RE_CDISPOSITION_FILENAME.search(req.headers_in.get("Content-Disposition", "")) if g: dummy, prefix, suffix = decompose_file(g.group("filename")) ## Let's optionally accept an MD5 hash (and use it later for comparison) cmd5 = req.headers_in["Content-MD5"] if cmd5: the_md5 = md5() ## Ok. We can initialize the file fd, path = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR) the_file = os.fdopen(fd, 'w') ## Let's read the file while True: chunk = req.read(max(10240, clen)) if len(chunk) < clen: ## We expected to read at least clen (which is different than 0) ## but chunk was shorter! Gosh! Error! Panic! the_file.close() os.close(fd) os.remove(path) raise InvenioWebInterfaceWSGIContentLenghtError("File shorter than what specified in Content-Length") if cmd5: ## MD5 was in the header let's compute it the_md5.update(chunk) ## And let's definitively write the content to disk :-) the_file.write(chunk) clen -= len(chunk) if clen == 0: ## That's it. Everything was read. break if cmd5 and the_md5.hexdigest().lower() != cmd5.strip().lower(): ## Let's check the MD5 the_file.close() os.close(fd) os.remove(path) raise InvenioWebInterfaceWSGIContentMD5Error("MD5 checksum does not match") ## Let's clean everything up the_file.close() return (path, ctype) diff --git a/modules/websubmit/lib/bibdocfile.py b/modules/websubmit/lib/bibdocfile.py index c266974fa..73f8bb8a8 100644 --- a/modules/websubmit/lib/bibdocfile.py +++ b/modules/websubmit/lib/bibdocfile.py @@ -1,4007 +1,4013 @@ ## This file is part of Invenio. ## Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ This module implements the low-level API for dealing with fulltext files. - All the files associated to a I{record} (identified by a I{recid}) can be managed via an instance of the C{BibRecDocs} class. - A C{BibRecDocs} is a wrapper of the list of I{documents} attached to the record. - Each document is represented by an instance of the C{BibDoc} class. - A document is identified by a C{docid} and name (C{docname}). The docname must be unique within the record. A document is the set of all the formats and revisions of a piece of information. - A document has a type called C{doctype} and can have a restriction. - Each physical file, i.e. the concretization of a document into a particular I{version} and I{format} is represented by an instance of the C{BibDocFile} class. - The format is infact the extension of the physical file. - A comment and a description and other information can be associated to a BibDocFile. - A C{bibdoc} is a synonim for a document, while a C{bibdocfile} is a synonim for a physical file. @group Main classes: BibRecDocs,BibDoc,BibDocFile @group Other classes: BibDocMoreInfo,Md5Folder,InvenioWebSubmitFileError @group Main functions: decompose_file,stream_file,bibdocfile_*,download_url @group Configuration Variables: CFG_* """ __revision__ = "$Id$" import os import re import shutil import filecmp import time import random import socket import urllib2 import urllib import tempfile import cPickle import base64 import binascii import cgi import sys from warnings import warn if sys.hexversion < 0x2060000: from md5 import md5 else: from hashlib import md5 try: import magic if not hasattr(magic, "open"): raise ImportError CFG_HAS_MAGIC = True except ImportError: CFG_HAS_MAGIC = False ## The above flag controls whether HTTP range requests are supported or not ## when serving static files via Python. This is disabled by default as ## it currently breaks support for opening PDF files on Windows platforms ## using Acrobat reader brower plugin. CFG_ENABLE_HTTP_RANGE_REQUESTS = False from datetime import datetime from mimetypes import MimeTypes from thread import get_ident from invenio import webinterface_handler_config as apache ## Let's set a reasonable timeout for URL request (e.g. FFT) socket.setdefaulttimeout(40) if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from invenio.shellutils import escape_shell_arg from invenio.dbquery import run_sql, DatabaseError, blob_to_string from invenio.errorlib import register_exception from invenio.bibrecord import record_get_field_instances, \ field_get_subfield_values, field_get_subfield_instances, \ encode_for_xml -from invenio.urlutils import create_url +from invenio.urlutils import create_url, make_user_agent_string from invenio.textutils import nice_size from invenio.access_control_engine import acc_authorize_action from invenio.webuser import collect_user_info from invenio.access_control_admin import acc_is_user_in_role, acc_get_role_id from invenio.access_control_firerole import compile_role_definition, acc_firerole_check_user from invenio.access_control_config import SUPERADMINROLE, CFG_WEBACCESS_WARNING_MSGS from invenio.config import CFG_SITE_LANG, CFG_SITE_URL, \ CFG_WEBDIR, CFG_WEBSUBMIT_FILEDIR,\ CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS, \ CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT, CFG_SITE_SECURE_URL, \ CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS, \ CFG_TMPDIR, CFG_TMPSHAREDDIR, CFG_PATH_MD5SUM, \ CFG_WEBSUBMIT_STORAGEDIR, \ CFG_BIBDOCFILE_USE_XSENDFILE, \ CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY, \ CFG_SITE_RECORD, \ CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS, \ CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE from invenio.websubmit_config import CFG_WEBSUBMIT_ICON_SUBFORMAT_RE, \ CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT import invenio.template websubmit_templates = invenio.template.load('websubmit') websearch_templates = invenio.template.load('websearch') #: block size when performing I/O. CFG_BIBDOCFILE_BLOCK_SIZE = 1024 * 8 #: threshold used do decide when to use Python MD5 of CLI MD5 algorithm. CFG_BIBDOCFILE_MD5_THRESHOLD = 256 * 1024 #: chunks loaded by the Python MD5 algorithm. CFG_BIBDOCFILE_MD5_BUFFER = 1024 * 1024 #: whether to normalize e.g. ".JPEG" and ".jpg" into .jpeg. CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION = False #: flags that can be associated to files. CFG_BIBDOCFILE_AVAILABLE_FLAGS = ( 'PDF/A', 'STAMPED', 'PDFOPT', 'HIDDEN', 'CONVERTED', 'PERFORM_HIDE_PREVIOUS', 'OCRED' ) #: constant used if FFT correct with the obvious meaning. KEEP_OLD_VALUE = 'KEEP-OLD-VALUE' _CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS = [(re.compile(_regex), _headers) for _regex, _headers in CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS] _mimes = MimeTypes(strict=False) _mimes.suffix_map.update({'.tbz2' : '.tar.bz2'}) _mimes.encodings_map.update({'.bz2' : 'bzip2'}) _magic_cookies = {} def _get_magic_cookies(): """ @return: a tuple of magic object. @rtype: (MAGIC_NONE, MAGIC_COMPRESS, MAGIC_MIME, MAGIC_COMPRESS + MAGIC_MIME) @note: ... not real magic. Just see: man file(1) """ thread_id = get_ident() if thread_id not in _magic_cookies: _magic_cookies[thread_id] = { magic.MAGIC_NONE : magic.open(magic.MAGIC_NONE), magic.MAGIC_COMPRESS : magic.open(magic.MAGIC_COMPRESS), magic.MAGIC_MIME : magic.open(magic.MAGIC_MIME), magic.MAGIC_COMPRESS + magic.MAGIC_MIME : magic.open(magic.MAGIC_COMPRESS + magic.MAGIC_MIME) } for key in _magic_cookies[thread_id].keys(): _magic_cookies[thread_id][key].load() return _magic_cookies[thread_id] def _generate_extensions(): """ Generate the regular expression to match all the known extensions. @return: the regular expression. @rtype: regular expression object """ _tmp_extensions = _mimes.encodings_map.keys() + \ _mimes.suffix_map.keys() + \ _mimes.types_map[1].keys() + \ CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS extensions = [] for ext in _tmp_extensions: if ext.startswith('.'): extensions.append(ext) else: extensions.append('.' + ext) extensions.sort() extensions.reverse() extensions = set([ext.lower() for ext in extensions]) extensions = '\\' + '$|\\'.join(extensions) + '$' extensions = extensions.replace('+', '\\+') return re.compile(extensions, re.I) #: Regular expression to recognized extensions. _extensions = _generate_extensions() class InvenioWebSubmitFileError(Exception): """ Exception raised in case of errors related to fulltext files. """ pass class InvenioBibdocfileUnauthorizedURL(Exception): """ Exception raised when one tries to download an unauthorized external URL. """ pass def file_strip_ext(afile, skip_version=False, only_known_extensions=False, allow_subformat=True): """ Strip in the best way the extension from a filename. >>> file_strip_ext("foo.tar.gz") 'foo' >>> file_strip_ext("foo.buz.gz") 'foo.buz' >>> file_strip_ext("foo.buz") 'foo' >>> file_strip_ext("foo.buz", only_known_extensions=True) 'foo.buz' >>> file_strip_ext("foo.buz;1", skip_version=False, ... only_known_extensions=True) 'foo.buz;1' >>> file_strip_ext("foo.gif;icon") 'foo' >>> file_strip_ext("foo.gif:icon", allow_subformat=False) 'foo.gif:icon' @param afile: the path/name of a file. @type afile: string @param skip_version: whether to skip a trailing ";version". @type skip_version: bool @param only_known_extensions: whether to strip out only known extensions or to consider as extension anything that follows a dot. @type only_known_extensions: bool @param allow_subformat: whether to consider also subformats as part of the extension. @type allow_subformat: bool @return: the name/path without the extension (and version). @rtype: string """ if skip_version or allow_subformat: afile = afile.split(';')[0] nextfile = _extensions.sub('', afile) if nextfile == afile and not only_known_extensions: nextfile = os.path.splitext(afile)[0] while nextfile != afile: afile = nextfile nextfile = _extensions.sub('', afile) return nextfile def normalize_format(format, allow_subformat=True): """ Normalize the format, e.g. by adding a dot in front. @param format: the format/extension to be normalized. @type format: string @param allow_subformat: whether to consider also subformats as part of the extension. @type allow_subformat: bool @return: the normalized format. @rtype; string """ if allow_subformat: subformat = format[format.rfind(';'):] format = format[:format.rfind(';')] else: subformat = '' if format and format[0] != '.': format = '.' + format if CFG_BIBDOCFILE_STRONG_FORMAT_NORMALIZATION: if format not in ('.Z', '.H', '.C', '.CC'): format = format.lower() format = { '.jpg' : '.jpeg', '.htm' : '.html', '.tif' : '.tiff' }.get(format, format) return format + subformat def guess_format_from_url(url): """ Given a URL tries to guess it's extension. Different method will be used, including HTTP HEAD query, downloading the resource and using mime @param url: the URL for which the extension shuld be guessed. @type url: string @return: the recognized extension or empty string if it's impossible to recognize it. @rtype: string """ ## Let's try to guess the extension by considering the URL as a filename ext = decompose_file(url, skip_version=True, only_known_extensions=True)[2] if ext.startswith('.'): return ext if is_url_a_local_file(url) and CFG_HAS_MAGIC: ## if the URL corresponds to a local file, let's try to use ## the Python magic library to guess it try: magic_cookie = _get_magic_cookies()[magic.MAGIC_MIME] mimetype = magic_cookie.file(url) ext = _mimes.guess_extension(mimetype) if ext: return normalize_format(ext) except Exception: pass else: ## Since the URL is remote, let's try to perform a HEAD request ## and see the corresponding headers try: response = open_url(url, head_request=True) except (InvenioBibdocfileUnauthorizedURL, urllib2.URLError): return "" format = get_format_from_http_response(response) if format: return format if CFG_HAS_MAGIC: ## Last solution: let's download the remote resource ## and use the Python magic library to guess the extension try: filename = download_url(url, format='') magic_cookie = _get_magic_cookies()[magic.MAGIC_MIME] mimetype = magic_cookie.file(filename) os.remove(filename) ext = _mimes.guess_extension(mimetype) if ext: return normalize_format(ext) except Exception: pass return "" _docname_re = re.compile(r'[^-\w.]*') def normalize_docname(docname): """ Normalize the docname. At the moment the normalization is just returning the same string. @param docname: the docname to be normalized. @type docname: string @return: the normalized docname. @rtype: string """ #return _docname_re.sub('', docname) return docname def normalize_version(version): """ Normalize the version. The version can be either an integer or the keyword 'all'. Any other value will be transformed into the empty string. @param version: the version (either a number or 'all'). @type version: integer or string @return: the normalized version. @rtype: string """ try: int(version) except ValueError: if version.lower().strip() == 'all': return 'all' else: return '' return str(version) def compose_file(dirname, docname, extension, subformat=None, version=None): """ Construct back a fullpath given the separate components. """ if version: version = ";%i" % int(version) else: version = "" if subformat: if not subformat.startswith(";"): subformat = ";%s" % subformat else: subformat = "" if extension and not extension.startswith("."): extension = ".%s" % extension return os.path.join(dirname, docname + extension + subformat + version) def compose_format(extension, subformat=None): """ Construct the format string """ if not extension.startswith("."): extension = ".%s" % extension if subformat: if not subformat.startswith(";"): subformat = ";%s" % subformat else: subformat = "" return extension + subformat def decompose_file(afile, skip_version=False, only_known_extensions=False, allow_subformat=True): """ Decompose a file/path into its components dirname, basename and extension. >>> decompose_file('/tmp/foo.tar.gz') ('/tmp', 'foo', '.tar.gz') >>> decompose_file('/tmp/foo.tar.gz;1', skip_version=True) ('/tmp', 'foo', '.tar.gz') >>> decompose_file('http://www.google.com/index.html') ('http://www.google.com', 'index', '.html') @param afile: the path/name of a file. @type afile: string @param skip_version: whether to skip a trailing ";version". @type skip_version: bool @param only_known_extensions: whether to strip out only known extensions or to consider as extension anything that follows a dot. @type only_known_extensions: bool @param allow_subformat: whether to consider also subformats as part of the extension. @type allow_subformat: bool @return: a tuple with the directory name, the docname and extension. @rtype: (dirname, docname, extension) @note: if a URL is provided, the scheme will be part of the dirname. @see: L{file_strip_ext} for the algorithm used to retrieve the extension. """ if skip_version: version = afile.split(';')[-1] try: int(version) afile = afile[:-len(version)-1] except ValueError: pass basename = os.path.basename(afile) dirname = afile[:-len(basename)-1] base = file_strip_ext( basename, only_known_extensions=only_known_extensions, allow_subformat=allow_subformat) extension = basename[len(base) + 1:] if extension: extension = '.' + extension return (dirname, base, extension) def decompose_file_with_version(afile): """ Decompose a file into dirname, basename, extension and version. >>> decompose_file_with_version('/tmp/foo.tar.gz;1') ('/tmp', 'foo', '.tar.gz', 1) @param afile: the path/name of a file. @type afile: string @return: a tuple with the directory name, the docname, extension and version. @rtype: (dirname, docname, extension, version) @raise ValueError: in case version does not exist it will. @note: if a URL is provided, the scheme will be part of the dirname. """ version_str = afile.split(';')[-1] version = int(version_str) afile = afile[:-len(version_str)-1] basename = os.path.basename(afile) dirname = afile[:-len(basename)-1] base = file_strip_ext(basename) extension = basename[len(base) + 1:] if extension: extension = '.' + extension return (dirname, base, extension, version) def get_subformat_from_format(format): """ @return the subformat if any. @rtype: string >>> get_superformat_from_format('foo;bar') 'bar' >>> get_superformat_from_format('foo') '' """ try: return format[format.rindex(';') + 1:] except ValueError: return '' def get_superformat_from_format(format): """ @return the superformat if any. @rtype: string >>> get_superformat_from_format('foo;bar') 'foo' >>> get_superformat_from_format('foo') 'foo' """ try: return format[:format.rindex(';')] except ValueError: return format def propose_next_docname(docname): """ Given a I{docname}, suggest a new I{docname} (useful when trying to generate a unique I{docname}). >>> propose_next_docname('foo') 'foo_1' >>> propose_next_docname('foo_1') 'foo_2' >>> propose_next_docname('foo_10') 'foo_11' @param docname: the base docname. @type docname: string @return: the next possible docname based on the given one. @rtype: string """ if '_' in docname: split_docname = docname.split('_') try: split_docname[-1] = str(int(split_docname[-1]) + 1) docname = '_'.join(split_docname) except ValueError: docname += '_1' else: docname += '_1' return docname class BibRecDocs: """ This class represents all the files attached to one record. @param recid: the record identifier. @type recid: integer @param deleted_too: whether to consider deleted documents as normal documents (useful when trying to recover deleted information). @type deleted_too: bool @param human_readable: whether numbers should be printed in human readable format (e.g. 2048 bytes -> 2Kb) @ivar id: the record identifier as passed to the constructor. @type id: integer @ivar human_readable: the human_readable flag as passed to the constructor. @type human_readable: bool @ivar deleted_too: the deleted_too flag as passed to the constructor. @type deleted_too: bool @ivar bibdocs: the list of documents attached to the record. @type bibdocs: list of BibDoc """ def __init__(self, recid, deleted_too=False, human_readable=False): try: self.id = int(recid) except ValueError: raise ValueError("BibRecDocs: recid is %s but must be an integer." % repr(recid)) self.human_readable = human_readable self.deleted_too = deleted_too self.bibdocs = [] self.build_bibdoc_list() def __repr__(self): """ @return: the canonical string representation of the C{BibRecDocs}. @rtype: string """ return 'BibRecDocs(%s%s%s)' % (self.id, self.deleted_too and ', True' or '', self.human_readable and ', True' or '' ) def __str__(self): """ @return: an easy to be I{grepped} string representation of the whole C{BibRecDocs} content. @rtype: string """ out = '%i::::total bibdocs attached=%i\n' % (self.id, len(self.bibdocs)) out += '%i::::total size latest version=%s\n' % (self.id, nice_size(self.get_total_size_latest_version())) out += '%i::::total size all files=%s\n' % (self.id, nice_size(self.get_total_size())) for bibdoc in self.bibdocs: out += str(bibdoc) return out def empty_p(self): """ @return: True when the record has no attached documents. @rtype: bool """ return len(self.bibdocs) == 0 def deleted_p(self): """ @return: True if the corresponding record has been deleted. @rtype: bool """ from invenio.search_engine import record_exists return record_exists(self.id) == -1 def get_xml_8564(self): """ Return a snippet of I{MARCXML} representing the I{8564} fields corresponding to the current state. @return: the MARCXML representation. @rtype: string """ from invenio.search_engine import get_record out = '' record = get_record(self.id) fields = record_get_field_instances(record, '856', '4', ' ') for field in fields: urls = field_get_subfield_values(field, 'u') if urls and not bibdocfile_url_p(urls[0]): out += '\t\n' for subfield, value in field_get_subfield_instances(field): out += '\t\t%s\n' % (subfield, encode_for_xml(value)) out += '\t\n' for afile in self.list_latest_files(list_hidden=False): out += '\t\n' url = afile.get_url() description = afile.get_description() comment = afile.get_comment() if url: out += '\t\t%s\n' % encode_for_xml(url) if description: out += '\t\t%s\n' % encode_for_xml(description) if comment: out += '\t\t%s\n' % encode_for_xml(comment) out += '\t\n' return out def get_total_size_latest_version(self): """ Returns the total size used on disk by all the files belonging to this record and corresponding to the latest version. @return: the total size. @rtype: integer """ size = 0 for bibdoc in self.bibdocs: size += bibdoc.get_total_size_latest_version() return size def get_total_size(self): """ Return the total size used on disk of all the files belonging to this record of any version (not only the last as in L{get_total_size_latest_version}). @return: the total size. @rtype: integer """ size = 0 for bibdoc in self.bibdocs: size += bibdoc.get_total_size() return size def build_bibdoc_list(self): """ This method must be called everytime a I{bibdoc} is added, removed or modified. """ self.bibdocs = [] if self.deleted_too: res = run_sql("""SELECT id_bibdoc, type FROM bibrec_bibdoc JOIN bibdoc ON id=id_bibdoc WHERE id_bibrec=%s ORDER BY docname ASC""", (self.id,)) else: res = run_sql("""SELECT id_bibdoc, type FROM bibrec_bibdoc JOIN bibdoc ON id=id_bibdoc WHERE id_bibrec=%s AND status<>'DELETED' ORDER BY docname ASC""", (self.id,)) for row in res: cur_doc = BibDoc(docid=row[0], recid=self.id, doctype=row[1], human_readable=self.human_readable) self.bibdocs.append(cur_doc) def list_bibdocs(self, doctype=''): """ Returns the list all bibdocs object belonging to a recid. If C{doctype} is set, it returns just the bibdocs of that doctype. @param doctype: the optional doctype. @type doctype: string @return: the list of bibdocs. @rtype: list of BibDoc """ if not doctype: return self.bibdocs else: return [bibdoc for bibdoc in self.bibdocs if doctype == bibdoc.doctype] def get_bibdoc_names(self, doctype=''): """ Returns all the names of the documents associated with the bibdoc. If C{doctype} is set, restrict the result to all the matching doctype. @param doctype: the optional doctype. @type doctype: string @return: the list of document names. @rtype: list of string """ return [bibdoc.docname for bibdoc in self.list_bibdocs(doctype)] def propose_unique_docname(self, docname): """ Given C{docname}, return a new docname that is not already attached to the record. @param docname: the reference docname. @type docname: string @return: a docname not already attached. @rtype: string """ docname = normalize_docname(docname) goodname = docname i = 1 while goodname in self.get_bibdoc_names(): i += 1 goodname = "%s_%s" % (docname, i) return goodname def merge_bibdocs(self, docname1, docname2): """ This method merge C{docname2} into C{docname1}. 1. Given all the formats of the latest version of the files attached to C{docname2}, these files are added as new formats into C{docname1}. 2. C{docname2} is marked as deleted. @raise InvenioWebSubmitFileError: if at least one format in C{docname2} already exists in C{docname1}. (In this case the two bibdocs are preserved) @note: comments and descriptions are also copied. @note: if C{docname2} has a I{restriction}(i.e. if the I{status} is set) and C{docname1} doesn't, the restriction is imported. """ bibdoc1 = self.get_bibdoc(docname1) bibdoc2 = self.get_bibdoc(docname2) ## Check for possibility for bibdocfile in bibdoc2.list_latest_files(): format = bibdocfile.get_format() if bibdoc1.format_already_exists_p(format): raise InvenioWebSubmitFileError('Format %s already exists in bibdoc %s of record %s. It\'s impossible to merge bibdoc %s into it.' % (format, docname1, self.id, docname2)) ## Importing restriction if needed. restriction1 = bibdoc1.get_status() restriction2 = bibdoc2.get_status() if restriction2 and not restriction1: bibdoc1.set_status(restriction2) ## Importing formats for bibdocfile in bibdoc2.list_latest_files(): format = bibdocfile.get_format() comment = bibdocfile.get_comment() description = bibdocfile.get_description() bibdoc1.add_file_new_format(bibdocfile.get_full_path(), description=description, comment=comment, format=format) ## Finally deleting old bibdoc2 bibdoc2.delete() self.build_bibdoc_list() def get_docid(self, docname): """ @param docname: the document name. @type docname: string @return: the identifier corresponding to the given C{docname}. @rtype: integer @raise InvenioWebSubmitFileError: if the C{docname} does not corresponds to a document attached to this record. """ for bibdoc in self.bibdocs: if bibdoc.docname == docname: return bibdoc.id raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \ "docname '%s'" % (self.id, docname) def get_docname(self, docid): """ @param docid: the document identifier. @type docid: integer @return: the name of the document corresponding to the given document identifier. @rtype: string @raise InvenioWebSubmitFileError: if the C{docid} does not corresponds to a document attached to this record. """ for bibdoc in self.bibdocs: if bibdoc.id == docid: return bibdoc.docname raise InvenioWebSubmitFileError, "Recid '%s' is not connected with a " \ "docid '%s'" % (self.id, docid) def has_docname_p(self, docname): """ @param docname: the document name, @type docname: string @return: True if a document with the given name is attached to this record. @rtype: bool """ for bibdoc in self.bibdocs: if bibdoc.docname == docname: return True return False def get_bibdoc(self, docname): """ @return: the bibdoc with a particular docname associated with this recid""" for bibdoc in self.bibdocs: if bibdoc.docname == docname: return bibdoc raise InvenioWebSubmitFileError, "Recid '%s' is not connected with " \ " docname '%s'" % (self.id, docname) def delete_bibdoc(self, docname): """ Deletes the document with the specified I{docname}. @param docname: the document name. @type docname: string """ for bibdoc in self.bibdocs: if bibdoc.docname == docname: bibdoc.delete() self.build_bibdoc_list() def add_bibdoc(self, doctype="Main", docname='file', never_fail=False): """ Add a new empty document object (a I{bibdoc}) to the list of documents of this record. @param doctype: the document type. @type doctype: string @param docname: the document name. @type docname: string @param never_fail: if True, this procedure will not fail, even if a document with the given name is already attached to this record. In this case a new name will be generated (see L{propose_unique_docname}). @type never_fail: bool @return: the newly created document object. @rtype: BibDoc @raise InvenioWebSubmitFileError: in case of any error. """ try: docname = normalize_docname(docname) if never_fail: docname = self.propose_unique_docname(docname) if docname in self.get_bibdoc_names(): raise InvenioWebSubmitFileError, "%s has already a bibdoc with docname %s" % (self.id, docname) else: bibdoc = BibDoc(recid=self.id, doctype=doctype, docname=docname, human_readable=self.human_readable) self.build_bibdoc_list() return bibdoc except Exception, e: register_exception() raise InvenioWebSubmitFileError(str(e)) def add_new_file(self, fullpath, doctype="Main", docname=None, never_fail=False, description=None, comment=None, format=None, flags=None): """ Directly add a new file to this record. Adds a new file with the following policy: - if the C{docname} is not set it is retrieved from the name of the file. - If a bibdoc with the given docname doesn't already exist, it is created and the file is added to it. - It it exist but it doesn't contain the format that is being added, the new format is added. - If the format already exists then if C{never_fail} is True a new bibdoc is created with a similar name but with a progressive number as a suffix and the file is added to it (see L{propose_unique_docname}). @param fullpath: the filesystme path of the document to be added. @type fullpath: string @param doctype: the type of the document. @type doctype: string @param docname: the document name. @type docname: string @param never_fail: if True, this procedure will not fail, even if a document with the given name is already attached to this record. In this case a new name will be generated (see L{propose_unique_docname}). @type never_fail: bool @param description: an optional description of the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be guessed (see L{guess_format_from_url}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string @return: the elaborated document object. @rtype: BibDoc @raise InvenioWebSubmitFileError: in case of error. """ if docname is None: docname = decompose_file(fullpath)[1] if format is None: format = decompose_file(fullpath)[2] docname = normalize_docname(docname) try: bibdoc = self.get_bibdoc(docname) except InvenioWebSubmitFileError: # bibdoc doesn't already exists! bibdoc = self.add_bibdoc(doctype, docname, False) bibdoc.add_file_new_version(fullpath, description=description, comment=comment, format=format, flags=flags) self.build_bibdoc_list() else: try: bibdoc.add_file_new_format(fullpath, description=description, comment=comment, format=format, flags=flags) self.build_bibdoc_list() except InvenioWebSubmitFileError, e: # Format already exist! if never_fail: bibdoc = self.add_bibdoc(doctype, docname, True) bibdoc.add_file_new_version(fullpath, description=description, comment=comment, format=format, flags=flags) self.build_bibdoc_list() else: raise return bibdoc def add_new_version(self, fullpath, docname=None, description=None, comment=None, format=None, flags=None): """ Adds a new file to an already existent document object as a new version. @param fullpath: the filesystem path of the file to be added. @type fullpath: string @param docname: the document name. If not specified it will be extracted from C{fullpath} (see L{decompose_file}). @type docname: string @param description: an optional description for the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be guessed (see L{guess_format_from_url}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string @return: the elaborated document object. @rtype: BibDoc @raise InvenioWebSubmitFileError: in case of error. @note: previous files associated with the same document will be considered obsolete. """ if docname is None: docname = decompose_file(fullpath)[1] if format is None: format = decompose_file(fullpath)[2] if flags is None: flags = [] if 'pdfa' in get_subformat_from_format(format).split(';') and not 'PDF/A' in flags: flags.append('PDF/A') bibdoc = self.get_bibdoc(docname=docname) bibdoc.add_file_new_version(fullpath, description=description, comment=comment, format=format, flags=flags) self.build_bibdoc_list() return bibdoc def add_new_format(self, fullpath, docname=None, description=None, comment=None, format=None, flags=None): """ Adds a new file to an already existent document object as a new format. @param fullpath: the filesystem path of the file to be added. @type fullpath: string @param docname: the document name. If not specified it will be extracted from C{fullpath} (see L{decompose_file}). @type docname: string @param description: an optional description for the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be guessed (see L{guess_format_from_url}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string @return: the elaborated document object. @rtype: BibDoc @raise InvenioWebSubmitFileError: in case the same format already exists. """ if docname is None: docname = decompose_file(fullpath)[1] if format is None: format = decompose_file(fullpath)[2] if flags is None: flags = [] if 'pdfa' in get_subformat_from_format(format).split(';') and not 'PDF/A' in flags: flags.append('PDF/A') bibdoc = self.get_bibdoc(docname=docname) bibdoc.add_file_new_format(fullpath, description=description, comment=comment, format=format, flags=flags) self.build_bibdoc_list() return bibdoc def list_latest_files(self, doctype='', list_hidden=True): """ Returns a list of the latest files. @param doctype: if set, only document of the given type will be listed. @type doctype: string @param list_hidden: if True, will list also files with the C{HIDDEN} flag being set. @type list_hidden: bool @return: the list of latest files. @rtype: list of BibDocFile """ docfiles = [] for bibdoc in self.list_bibdocs(doctype): docfiles += bibdoc.list_latest_files(list_hidden=list_hidden) return docfiles def display(self, docname="", version="", doctype="", ln=CFG_SITE_LANG, verbose=0, display_hidden=True): """ Returns an HTML representation of the the attached documents. @param docname: if set, include only the requested document. @type docname: string @param version: if not set, only the last version will be displayed. If 'all', all versions will be displayed. @type version: string (integer or 'all') @param doctype: is set, include only documents of the requested type. @type doctype: string @param ln: the language code. @type ln: string @param verbose: if greater than 0, includes debug information. @type verbose: integer @param display_hidden: whether to include hidden files as well. @type display_hidden: bool @return: the formatted representation. @rtype: HTML string """ t = "" if docname: try: bibdocs = [self.get_bibdoc(docname)] except InvenioWebSubmitFileError: bibdocs = self.list_bibdocs(doctype) else: bibdocs = self.list_bibdocs(doctype) if bibdocs: types = list_types_from_array(bibdocs) fulltypes = [] for mytype in types: if mytype in ('Plot', 'PlotMisc'): # FIXME: quick hack to ignore plot-like doctypes # on Files tab continue fulltype = { 'name' : mytype, 'content' : [], } for bibdoc in bibdocs: if mytype == bibdoc.get_type(): fulltype['content'].append(bibdoc.display(version, ln=ln, display_hidden=display_hidden)) fulltypes.append(fulltype) if verbose >= 9: verbose_files = str(self) else: verbose_files = '' t = websubmit_templates.tmpl_bibrecdoc_filelist( ln=ln, types = fulltypes, verbose_files=verbose_files ) return t def fix(self, docname): """ Algorithm that transform a broken/old bibdoc into a coherent one. Think of it as being the fsck of BibDocs. - All the files in the bibdoc directory will be renamed according to the document name. Proper .recid, .type, .md5 files will be created/updated. - In case of more than one file with the same format version a new bibdoc will be created in order to put does files. @param docname: the document name that need to be fixed. @type docname: string @return: the list of newly created bibdocs if any. @rtype: list of BibDoc @raise InvenioWebSubmitFileError: in case of issues that can not be fixed automatically. """ bibdoc = self.get_bibdoc(docname) versions = {} res = [] new_bibdocs = [] # List of files with the same version/format of # existing file which need new bibdoc. counter = 0 zero_version_bug = False if os.path.exists(bibdoc.basedir): for filename in os.listdir(bibdoc.basedir): if filename[0] != '.' and ';' in filename: name, version = filename.split(';') try: version = int(version) except ValueError: # Strange name register_exception() raise InvenioWebSubmitFileError, "A file called %s exists under %s. This is not a valid name. After the ';' there must be an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir) if version == 0: zero_version_bug = True format = name[len(file_strip_ext(name)):] format = normalize_format(format) if not versions.has_key(version): versions[version] = {} new_name = 'FIXING-%s-%s' % (str(counter), name) try: shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, new_name), e) if versions[version].has_key(format): new_bibdocs.append((new_name, version)) else: versions[version][format] = new_name counter += 1 elif filename[0] != '.': # Strange name register_exception() raise InvenioWebSubmitFileError, "A file called %s exists under %s. This is not a valid name. There should be a ';' followed by an integer representing the file version. Please, manually fix this file either by renaming or by deleting it." % (filename, bibdoc.basedir) else: # we create the corresponding storage directory old_umask = os.umask(022) os.makedirs(bibdoc.basedir) # and save the father record id if it exists try: if self.id != "": recid_fd = open("%s/.recid" % bibdoc.basedir, "w") recid_fd.write(str(self.id)) recid_fd.close() if bibdoc.doctype != "": type_fd = open("%s/.type" % bibdoc.basedir, "w") type_fd.write(str(bibdoc.doctype)) type_fd.close() except Exception, e: register_exception() raise InvenioWebSubmitFileError, e os.umask(old_umask) if not versions: bibdoc.delete() else: for version, formats in versions.iteritems(): if zero_version_bug: version += 1 for format, filename in formats.iteritems(): destination = '%s%s;%i' % (docname, format, version) try: shutil.move('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Error in renaming '%s' to '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), '%s/%s' % (bibdoc.basedir, destination), e) try: recid_fd = open("%s/.recid" % bibdoc.basedir, "w") recid_fd.write(str(self.id)) recid_fd.close() type_fd = open("%s/.type" % bibdoc.basedir, "w") type_fd.write(str(bibdoc.doctype)) type_fd.close() except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Error in creating .recid and .type file for '%s' folder: '%s'" % (bibdoc.basedir, e) self.build_bibdoc_list() res = [] for (filename, version) in new_bibdocs: if zero_version_bug: version += 1 new_bibdoc = self.add_bibdoc(doctype=bibdoc.doctype, docname=docname, never_fail=True) new_bibdoc.add_file_new_format('%s/%s' % (bibdoc.basedir, filename), version) res.append(new_bibdoc) try: os.remove('%s/%s' % (bibdoc.basedir, filename)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Error in removing '%s': '%s'" % ('%s/%s' % (bibdoc.basedir, filename), e) Md5Folder(bibdoc.basedir).update(only_new=False) bibdoc._build_file_list() self.build_bibdoc_list() for bibdoc in self.bibdocs: if not run_sql('SELECT more_info FROM bibdoc WHERE id=%s', (bibdoc.id,)): ## Import from MARC only if the bibdoc has never had ## its more_info initialized. try: bibdoc.import_descriptions_and_comments_from_marc() except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Error in importing description and comment from %s for record %s: %s" % (repr(bibdoc), self.id, e) return res def check_format(self, docname): """ Check for any format related issue. In case L{CFG_WEBSUBMIT_ADDITIONAL_KNOWN_FILE_EXTENSIONS} is altered or Python version changes, it might happen that a docname contains files which are no more docname + .format ; version, simply because the .format is now recognized (and it was not before, so it was contained into the docname). This algorithm verify if it is necessary to fix (seel L{fix_format}). @param docname: the document name whose formats should be verified. @type docname: string @return: True if format is correct. False if a fix is needed. @rtype: bool @raise InvenioWebSubmitFileError: in case of any error. """ bibdoc = self.get_bibdoc(docname) correct_docname = decompose_file(docname + '.pdf')[1] if docname != correct_docname: return False for filename in os.listdir(bibdoc.basedir): if not filename.startswith('.'): try: dummy, dummy, format, version = decompose_file_with_version(filename) except Exception: raise InvenioWebSubmitFileError('Incorrect filename "%s" for docname %s for recid %i' % (filename, docname, self.id)) if '%s%s;%i' % (correct_docname, format, version) != filename: return False return True def check_duplicate_docnames(self): """ Check wethever the record is connected with at least tho documents with the same name. @return: True if everything is fine. @rtype: bool """ docnames = set() for docname in self.get_bibdoc_names(): if docname in docnames: return False else: docnames.add(docname) return True def uniformize_bibdoc(self, docname): """ This algorithm correct wrong file name belonging to a bibdoc. @param docname: the document name whose formats should be verified. @type docname: string """ bibdoc = self.get_bibdoc(docname) for filename in os.listdir(bibdoc.basedir): if not filename.startswith('.'): try: dummy, dummy, format, version = decompose_file_with_version(filename) except ValueError: register_exception(alert_admin=True, prefix= "Strange file '%s' is stored in %s" % (filename, bibdoc.basedir)) else: os.rename(os.path.join(bibdoc.basedir, filename), os.path.join(bibdoc.basedir, '%s%s;%i' % (docname, format, version))) Md5Folder(bibdoc.basedir).update() bibdoc.touch() bibdoc._build_file_list('rename') def fix_format(self, docname, skip_check=False): """ Fixes format related inconsistencies. @param docname: the document name whose formats should be verified. @type docname: string @param skip_check: if True assume L{check_format} has already been called and the need for fix has already been found. If False, will implicitly call L{check_format} and skip fixing if no error is found. @type skip_check: bool @return: in case merging two bibdocs is needed but it's not possible. @rtype: bool """ if not skip_check: if self.check_format(docname): return True bibdoc = self.get_bibdoc(docname) correct_docname = decompose_file(docname + '.pdf')[1] need_merge = False if correct_docname != docname: need_merge = self.has_docname_p(correct_docname) if need_merge: proposed_docname = self.propose_unique_docname(correct_docname) run_sql('UPDATE bibdoc SET docname=%s WHERE id=%s', (proposed_docname, bibdoc.id)) self.build_bibdoc_list() self.uniformize_bibdoc(proposed_docname) try: self.merge_bibdocs(docname, proposed_docname) except InvenioWebSubmitFileError: return False else: run_sql('UPDATE bibdoc SET docname=%s WHERE id=%s', (correct_docname, bibdoc.id)) self.build_bibdoc_list() self.uniformize_bibdoc(correct_docname) else: self.uniformize_bibdoc(docname) return True def fix_duplicate_docnames(self, skip_check=False): """ Algotirthm to fix duplicate docnames. If a record is connected with at least two bibdoc having the same docname, the algorithm will try to merge them. @param skip_check: if True assume L{check_duplicate_docnames} has already been called and the need for fix has already been found. If False, will implicitly call L{check_duplicate_docnames} and skip fixing if no error is found. @type skip_check: bool """ if not skip_check: if self.check_duplicate_docnames(): return docnames = set() for bibdoc in self.list_bibdocs(): docname = bibdoc.docname if docname in docnames: new_docname = self.propose_unique_docname(bibdoc.docname) bibdoc.change_name(new_docname) self.merge_bibdocs(docname, new_docname) docnames.add(docname) def check_file_exists(self, path): """ Check if a file with the same content of the file pointed in C{path} is already attached to this record. @param path: the file to be checked against. @type path: string @return: True if a file with the requested content is already attached to the record. @rtype: bool """ # Let's consider all the latest files for bibdoc in self.list_bibdocs(): if bibdoc.check_file_exists(path): return True return False class BibDoc: """ This class represents one document (i.e. a set of files with different formats and with versioning information that consitutes a piece of information. To instanciate a new document, the recid and the docname are mandatory. To instanciate an already existing document, either the recid and docname or the docid alone are sufficient to retrieve it. @param docid: the document identifier. @type docid: integer @param recid: the record identifier of the record to which this document belongs to. If the C{docid} is specified the C{recid} is automatically retrieven from the database. @type recid: integer @param docname: the document name. @type docname: string @param doctype: the document type (used when instanciating a new document). @type doctype: string @param human_readable: whether sizes should be represented in a human readable format. @type human_readable: bool @raise InvenioWebSubmitFileError: in case of error. """ def __init__ (self, docid=None, recid=None, docname=None, doctype='Main', human_readable=False): """Constructor of a bibdoc. At least the docid or the recid/docname pair is needed.""" # docid is known, the document already exists if docname: docname = normalize_docname(docname) self.docfiles = [] self.md5s = None self.human_readable = human_readable if docid: if not recid: res = run_sql("SELECT id_bibrec,type FROM bibrec_bibdoc WHERE id_bibdoc=%s LIMIT 1", (docid,), 1) if res: recid = res[0][0] doctype = res[0][1] else: warn("Docid %s is orphan" % docid) else: res = run_sql("SELECT type FROM bibrec_bibdoc WHERE id_bibrec=%s AND id_bibdoc=%s LIMIT 1", (recid, docid,), 1) if res: doctype = res[0][0] else: #this bibdoc isn't associated with the corresponding bibrec. raise InvenioWebSubmitFileError, "Docid %s is not associated with the recid %s" % (docid, recid) # gather the other information res = run_sql("SELECT id,status,docname,creation_date,modification_date,text_extraction_date,more_info FROM bibdoc WHERE id=%s LIMIT 1", (docid,), 1) if res: self.cd = res[0][3] self.md = res[0][4] self.td = res[0][5] self.recid = recid self.docname = res[0][2] self.id = docid self.status = res[0][1] self.more_info = BibDocMoreInfo(docid, blob_to_string(res[0][6])) self.basedir = _make_base_dir(self.id) self.doctype = doctype else: # this bibdoc doesn't exist raise InvenioWebSubmitFileError, "The docid %s does not exist." % docid # else it is a new document else: if not docname: raise InvenioWebSubmitFileError, "You should specify the docname when creating a new bibdoc" else: self.recid = recid self.doctype = doctype self.docname = docname self.status = '' if recid: res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s LIMIT 1", (recid, docname), 1) if res: raise InvenioWebSubmitFileError("A bibdoc called %s already exists for recid %s" % (docname, recid)) self.id = run_sql("INSERT INTO bibdoc (status,docname,creation_date,modification_date) " "values(%s,%s,NOW(),NOW())", (self.status, docname)) if self.id: # we link the document to the record if a recid was # specified self.more_info = BibDocMoreInfo(self.id) res = run_sql("SELECT creation_date, modification_date, text_extraction_date FROM bibdoc WHERE id=%s", (self.id,)) self.cd = res[0][0] self.md = res[0][1] self.td = res[0][2] else: raise InvenioWebSubmitFileError, "New docid cannot be created" try: self.basedir = _make_base_dir(self.id) # we create the corresponding storage directory if not os.path.exists(self.basedir): old_umask = os.umask(022) os.makedirs(self.basedir) # and save the father record id if it exists try: if self.recid: recid_fd = open("%s/.recid" % self.basedir, "w") recid_fd.write(str(self.recid)) recid_fd.close() if self.doctype: type_fd = open("%s/.type" % self.basedir, "w") type_fd.write(str(self.doctype)) type_fd.close() except Exception, e: register_exception(alert_admin=True) raise InvenioWebSubmitFileError, e os.umask(old_umask) if self.recid: run_sql("INSERT INTO bibrec_bibdoc (id_bibrec, id_bibdoc, type) VALUES (%s,%s,%s)", (recid, self.id, self.doctype,)) except Exception, e: run_sql('DELETE FROM bibdoc WHERE id=%s', (self.id, )) run_sql('DELETE FROM bibrec_bibdoc WHERE id_bibdoc=%s', (self.id, )) register_exception(alert_admin=True) raise InvenioWebSubmitFileError, e # build list of attached files self._build_file_list('init') def __repr__(self): """ @return: the canonical string representation of the C{BibDoc}. @rtype: string """ return 'BibDoc(%s, %s, %s, %s, %s)' % (repr(self.id), repr(self.recid), repr(self.docname), repr(self.doctype), repr(self.human_readable)) def __str__(self): """ @return: an easy to be I{grepped} string representation of the whole C{BibDoc} content. @rtype: string """ out = '%s:%i:::docname=%s\n' % (self.recid or '', self.id, self.docname) out += '%s:%i:::doctype=%s\n' % (self.recid or '', self.id, self.doctype) out += '%s:%i:::status=%s\n' % (self.recid or '', self.id, self.status) out += '%s:%i:::basedir=%s\n' % (self.recid or '', self.id, self.basedir) out += '%s:%i:::creation date=%s\n' % (self.recid or '', self.id, self.cd) out += '%s:%i:::modification date=%s\n' % (self.recid or '', self.id, self.md) out += '%s:%i:::text extraction date=%s\n' % (self.recid or '', self.id, self.td) out += '%s:%i:::total file attached=%s\n' % (self.recid or '', self.id, len(self.docfiles)) if self.human_readable: out += '%s:%i:::total size latest version=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size_latest_version())) out += '%s:%i:::total size all files=%s\n' % (self.recid or '', self.id, nice_size(self.get_total_size())) else: out += '%s:%i:::total size latest version=%s\n' % (self.recid or '', self.id, self.get_total_size_latest_version()) out += '%s:%i:::total size all files=%s\n' % (self.recid or '', self.id, self.get_total_size()) for docfile in self.docfiles: out += str(docfile) return out def format_already_exists_p(self, format): """ @param format: a format to be checked. @type format: string @return: True if a file of the given format already exists among the latest files. @rtype: bool """ format = normalize_format(format) for afile in self.list_latest_files(): if format == afile.get_format(): return True return False def get_status(self): """ @return: the status information. @rtype: string """ return self.status def get_text(self, version=None): """ @param version: the requested version. If not set, the latest version will be used. @type version: integer @return: the textual content corresponding to the specified version of the document. @rtype: string """ if version is None: version = self.get_latest_version() if self.has_text(version): return open(os.path.join(self.basedir, '.text;%i' % version)).read() else: return "" def get_text_path(self, version=None): """ @param version: the requested version. If not set, the latest version will be used. @type version: int @return: the full path to the textual content corresponding to the specified version of the document. @rtype: string """ if version is None: version = self.get_latest_version() if self.has_text(version): return os.path.join(self.basedir, '.text;%i' % version) else: return "" def extract_text(self, version=None, perform_ocr=False, ln='en'): """ Try what is necessary to extract the textual information of a document. @param version: the version of the document for which text is required. If not specified the text will be retrieved from the last version. @type version: integer @param perform_ocr: whether to perform OCR. @type perform_ocr: bool @param ln: a two letter language code to give as a hint to the OCR procedure. @type ln: string @raise InvenioWebSubmitFileError: in case of error. @note: the text is extracted and cached for later use. Use L{get_text} to retrieve it. """ from invenio.websubmit_file_converter import get_best_format_to_extract_text_from, convert_file, InvenioWebSubmitFileConverterError if version is None: version = self.get_latest_version() docfiles = self.list_version_files(version) ## We try to extract text only from original or OCRed documents. filenames = [docfile.get_full_path() for docfile in docfiles if 'CONVERTED' not in docfile.flags or 'OCRED' in docfile.flags] try: filename = get_best_format_to_extract_text_from(filenames) except InvenioWebSubmitFileConverterError: ## We fall back on considering all the documents filenames = [docfile.get_full_path() for docfile in docfiles] try: filename = get_best_format_to_extract_text_from(filenames) except InvenioWebSubmitFileConverterError: open(os.path.join(self.basedir, '.text;%i' % version), 'w').write('') return try: convert_file(filename, os.path.join(self.basedir, '.text;%i' % version), '.txt', perform_ocr=perform_ocr, ln=ln) if version == self.get_latest_version(): run_sql("UPDATE bibdoc SET text_extraction_date=NOW() WHERE id=%s", (self.id, )) except InvenioWebSubmitFileConverterError, e: register_exception(alert_admin=True, prefix="Error in extracting text from bibdoc %i, version %i" % (self.id, version)) raise InvenioWebSubmitFileError, str(e) def touch(self): """ Update the modification time of the bibdoc (as in the UNIX command C{touch}). """ run_sql('UPDATE bibdoc SET modification_date=NOW() WHERE id=%s', (self.id, )) #if self.recid: #run_sql('UPDATE bibrec SET modification_date=NOW() WHERE id=%s', (self.recid, )) def set_status(self, new_status): """ Set a new status. A document with a status information is a restricted document that can be accessed only to user which as an authorization to the I{viewrestrdoc} WebAccess action with keyword status with value C{new_status}. @param new_status: the new status. If empty the document will be unrestricted. @type new_status: string @raise InvenioWebSubmitFileError: in case the reserved word 'DELETED' is used. """ if new_status != KEEP_OLD_VALUE: if new_status == 'DELETED': raise InvenioWebSubmitFileError('DELETED is a reserved word and can not be used for setting the status') run_sql('UPDATE bibdoc SET status=%s WHERE id=%s', (new_status, self.id)) self.status = new_status self.touch() self._build_file_list() def add_file_new_version(self, filename, description=None, comment=None, format=None, flags=None): """ Add a new version of a file. If no physical file is already attached to the document a the given file will have version 1. Otherwise the new file will have the current version number plus one. @param filename: the local path of the file. @type filename: string @param description: an optional description for the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be retrieved from the filename (see L{decompose_file}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string @raise InvenioWebSubmitFileError: in case of error. """ try: latestVersion = self.get_latest_version() if latestVersion == 0: myversion = 1 else: myversion = latestVersion + 1 if os.path.exists(filename): if not os.path.getsize(filename) > 0: raise InvenioWebSubmitFileError, "%s seems to be empty" % filename if format is None: format = decompose_file(filename)[2] else: format = normalize_format(format) destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, myversion) if run_sql("SELECT id_bibdoc FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, myversion, format)): raise InvenioWebSubmitFileError("According to the database a file of format %s is already attached to the docid %s" % (format, self.id)) try: shutil.copyfile(filename, destination) os.chmod(destination, 0644) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e) self.more_info.set_description(description, format, myversion) self.more_info.set_comment(comment, format, myversion) if flags is None: flags = [] if 'pdfa' in get_subformat_from_format(format).split(';') and not 'PDF/A' in flags: flags.append('PDF/A') for flag in flags: if flag == 'PERFORM_HIDE_PREVIOUS': for afile in self.list_all_files(): format = afile.get_format() version = afile.get_version() if version < myversion: self.more_info.set_flag('HIDDEN', format, myversion) else: self.more_info.set_flag(flag, format, myversion) else: raise InvenioWebSubmitFileError, "'%s' does not exists!" % filename finally: self.touch() Md5Folder(self.basedir).update() self._build_file_list() just_added_file = self.get_file(format, myversion) run_sql("INSERT INTO bibdocfsinfo(id_bibdoc, version, format, last_version, cd, md, checksum, filesize, mime) VALUES(%s, %s, %s, true, %s, %s, %s, %s, %s)", (self.id, myversion, format, just_added_file.cd, just_added_file.md, just_added_file.get_checksum(), just_added_file.get_size(), just_added_file.mime)) run_sql("UPDATE bibdocfsinfo SET last_version=false WHERE id_bibdoc=%s AND version<%s", (self.id, myversion)) def add_file_new_format(self, filename, version=None, description=None, comment=None, format=None, flags=None): """ Add a file as a new format. @param filename: the local path of the file. @type filename: string @param version: an optional specific version to which the new format should be added. If None, the last version will be used. @type version: integer @param description: an optional description for the file. @type description: string @param comment: an optional comment to the file. @type comment: string @param format: the extension of the file. If not specified it will be retrieved from the filename (see L{decompose_file}). @type format: string @param flags: a set of flags to be associated with the file (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}) @type flags: list of string @raise InvenioWebSubmitFileError: if the given format already exists. """ try: if version is None: version = self.get_latest_version() if version == 0: version = 1 if os.path.exists(filename): if not os.path.getsize(filename) > 0: raise InvenioWebSubmitFileError, "%s seems to be empty" % filename if format is None: format = decompose_file(filename)[2] else: format = normalize_format(format) if run_sql("SELECT id_bibdoc FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, version, format)): raise InvenioWebSubmitFileError("According to the database a file of format %s is already attached to the docid %s" % (format, self.id)) destination = "%s/%s%s;%i" % (self.basedir, self.docname, format, version) if os.path.exists(destination): raise InvenioWebSubmitFileError, "A file for docname '%s' for the recid '%s' already exists for the format '%s'" % (self.docname, self.recid, format) try: shutil.copyfile(filename, destination) os.chmod(destination, 0644) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while copying '%s' to '%s': '%s'" % (filename, destination, e) self.more_info.set_comment(comment, format, version) self.more_info.set_description(description, format, version) if flags is None: flags = [] if 'pdfa' in get_subformat_from_format(format).split(';') and not 'PDF/A' in flags: flags.append('PDF/A') for flag in flags: if flag != 'PERFORM_HIDE_PREVIOUS': self.more_info.set_flag(flag, format, version) else: raise InvenioWebSubmitFileError, "'%s' does not exists!" % filename finally: Md5Folder(self.basedir).update() self.touch() self._build_file_list() just_added_file = self.get_file(format, version) run_sql("INSERT INTO bibdocfsinfo(id_bibdoc, version, format, last_version, cd, md, checksum, filesize, mime) VALUES(%s, %s, %s, true, %s, %s, %s, %s, %s)", (self.id, version, format, just_added_file.cd, just_added_file.md, just_added_file.get_checksum(), just_added_file.get_size(), just_added_file.mime)) def purge(self): """ Physically removes all the previous version of the given bibdoc. Everything but the last formats will be erased. """ version = self.get_latest_version() if version > 1: for afile in self.docfiles: if afile.get_version() < version: self.more_info.unset_comment(afile.get_format(), afile.get_version()) self.more_info.unset_description(afile.get_format(), afile.get_version()) for flag in CFG_BIBDOCFILE_AVAILABLE_FLAGS: self.more_info.unset_flag(flag, afile.get_format(), afile.get_version()) try: os.remove(afile.get_full_path()) except Exception, e: register_exception() Md5Folder(self.basedir).update() self.touch() self._build_file_list() run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s AND version<%s", (self.id, version)) def expunge(self): """ Physically remove all the traces of a given document. @note: an expunged BibDoc object shouldn't be used anymore or the result might be unpredicted. """ del self.md5s del self.more_info os.system('rm -rf %s' % escape_shell_arg(self.basedir)) run_sql('DELETE FROM bibrec_bibdoc WHERE id_bibdoc=%s', (self.id, )) run_sql('DELETE FROM bibdoc_bibdoc WHERE id_bibdoc1=%s OR id_bibdoc2=%s', (self.id, self.id)) run_sql('DELETE FROM bibdoc WHERE id=%s', (self.id, )) run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, doctimestamp) VALUES("EXPUNGE", %s, %s, NOW())', (self.id, self.docname)) run_sql('DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s', (self.id, )) del self.docfiles del self.id del self.cd del self.md del self.td del self.basedir del self.recid del self.doctype del self.docname def revert(self, version): """ Revert the document to a given version. All the formats corresponding to that version are copied forward to a new version. @param version: the version to revert to. @type version: integer @raise InvenioWebSubmitFileError: in case of errors """ version = int(version) docfiles = self.list_version_files(version) if docfiles: self.add_file_new_version(docfiles[0].get_full_path(), description=docfiles[0].get_description(), comment=docfiles[0].get_comment(), format=docfiles[0].get_format(), flags=docfiles[0].flags) for docfile in docfiles[1:]: self.add_file_new_format(docfile.filename, description=docfile.get_description(), comment=docfile.get_comment(), format=docfile.get_format(), flags=docfile.flags) def import_descriptions_and_comments_from_marc(self, record=None): """ Import descriptions and comments from the corresponding MARC metadata. @param record: the record (if None it will be calculated). @type record: bibrecord recstruct @note: If record is passed it is directly used, otherwise it is retrieved from the MARCXML stored in the database. """ ## Let's get the record from invenio.search_engine import get_record if record is None: record = get_record(self.id) fields = record_get_field_instances(record, '856', '4', ' ') global_comment = None global_description = None local_comment = {} local_description = {} for field in fields: url = field_get_subfield_values(field, 'u') if url: ## Given a url url = url[0] if url == '%s/%s/%s/files/' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid): ## If it is a traditional /CFG_SITE_RECORD/1/files/ one ## We have global description/comment for all the formats description = field_get_subfield_values(field, 'y') if description: global_description = description[0] comment = field_get_subfield_values(field, 'z') if comment: global_comment = comment[0] elif bibdocfile_url_p(url): ## Otherwise we have description/comment per format dummy, docname, format = decompose_bibdocfile_url(url) if docname == self.docname: description = field_get_subfield_values(field, 'y') if description: local_description[format] = description[0] comment = field_get_subfield_values(field, 'z') if comment: local_comment[format] = comment[0] ## Let's update the tables version = self.get_latest_version() for docfile in self.list_latest_files(): format = docfile.get_format() if format in local_comment: self.set_comment(local_comment[format], format, version) else: self.set_comment(global_comment, format, version) if format in local_description: self.set_description(local_description[format], format, version) else: self.set_description(global_description, format, version) self._build_file_list('init') def get_icon(self, subformat_re=CFG_WEBSUBMIT_ICON_SUBFORMAT_RE, display_hidden=True): """ @param subformat_re: by default the convention is that L{CFG_WEBSUBMIT_ICON_SUBFORMAT_RE} is used as a subformat indicator to mean that a particular format is to be used as an icon. Specifiy a different subformat if you need to use a different convention. @type subformat_re: compiled regular expression @return: the bibdocfile corresponding to the icon of this document, or None if any icon exists for this document. @rtype: BibDocFile @warning: before I{subformat} were introduced this method was returning a BibDoc, while now is returning a BibDocFile. Check if your client code is compatible with this. """ for docfile in self.list_latest_files(list_hidden=display_hidden): if subformat_re.match(docfile.get_subformat()): return docfile return None def add_icon(self, filename, format=None, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT): """ Attaches icon to this document. @param filename: the local filesystem path to the icon. @type filename: string @param format: an optional format for the icon. If not specified it will be calculated after the filesystem path. @type format: string @param subformat: by default the convention is that CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT is used as a subformat indicator to mean that a particular format is to be used as an icon. Specifiy a different subformat if you need to use a different convention. @type subformat: string @raise InvenioWebSubmitFileError: in case of errors. """ #first check if an icon already exists if not format: format = decompose_file(filename)[2] if subformat: format += ";%s" % subformat self.add_file_new_format(filename, format=format) def delete_icon(self, subformat_re=CFG_WEBSUBMIT_ICON_SUBFORMAT_RE): """ @param subformat_re: by default the convention is that L{CFG_WEBSUBMIT_ICON_SUBFORMAT_RE} is used as a subformat indicator to mean that a particular format is to be used as an icon. Specifiy a different subformat if you need to use a different convention. @type subformat: compiled regular expression Removes the icon attached to the document if it exists. """ for docfile in self.list_latest_files(): if subformat_re.match(docfile.get_subformat()): self.delete_file(docfile.get_format(), docfile.get_version()) def display(self, version="", ln=CFG_SITE_LANG, display_hidden=True): """ Returns an HTML representation of the this document. @param version: if not set, only the last version will be displayed. If 'all', all versions will be displayed. @type version: string (integer or 'all') @param ln: the language code. @type ln: string @param display_hidden: whether to include hidden files as well. @type display_hidden: bool @return: the formatted representation. @rtype: HTML string """ t = "" if version == "all": docfiles = self.list_all_files(list_hidden=display_hidden) elif version != "": version = int(version) docfiles = self.list_version_files(version, list_hidden=display_hidden) else: docfiles = self.list_latest_files(list_hidden=display_hidden) icon = self.get_icon(display_hidden=display_hidden) if icon: imageurl = icon.get_url() else: imageurl = "%s/img/smallfiles.gif" % CFG_SITE_URL versions = [] for version in list_versions_from_array(docfiles): currversion = { 'version' : version, 'previous' : 0, 'content' : [] } if version == self.get_latest_version() and version != 1: currversion['previous'] = 1 for docfile in docfiles: if docfile.get_version() == version: currversion['content'].append(docfile.display(ln = ln)) versions.append(currversion) if versions: return websubmit_templates.tmpl_bibdoc_filelist( ln = ln, versions = versions, imageurl = imageurl, docname = self.docname, recid = self.recid, status = self.status ) else: return "" def change_name(self, newname): """ Renames this document name. @param newname: the new name. @type newname: string @raise InvenioWebSubmitFileError: if the new name corresponds to a document already attached to the record owning this document. """ try: newname = normalize_docname(newname) res = run_sql("SELECT b.id FROM bibrec_bibdoc bb JOIN bibdoc b on bb.id_bibdoc=b.id WHERE bb.id_bibrec=%s AND b.docname=%s", (self.recid, newname)) if res: raise InvenioWebSubmitFileError, "A bibdoc called %s already exists for recid %s" % (newname, self.recid) try: for f in os.listdir(self.basedir): if not f.startswith('.'): try: (dummy, base, extension, version) = decompose_file_with_version(f) except ValueError: register_exception(alert_admin=True, prefix="Strange file '%s' is stored in %s" % (f, self.basedir)) else: shutil.move(os.path.join(self.basedir, f), os.path.join(self.basedir, '%s%s;%i' % (newname, extension, version))) except Exception, e: register_exception() raise InvenioWebSubmitFileError("Error in renaming the bibdoc %s to %s for recid %s: %s" % (self.docname, newname, self.recid, e)) run_sql("update bibdoc set docname=%s where id=%s", (newname, self.id,)) self.docname = newname finally: Md5Folder(self.basedir).update() self.touch() self._build_file_list('rename') def set_comment(self, comment, format, version=None): """ Updates the comment of a specific format/version of the document. @param comment: the new comment. @type comment: string @param format: the specific format for which the comment should be updated. @type format: string @param version: the specific version for which the comment should be updated. If not specified the last version will be used. @type version: integer """ if version is None: version = self.get_latest_version() format = normalize_format(format) self.more_info.set_comment(comment, format, version) self.touch() self._build_file_list('init') def set_description(self, description, format, version=None): """ Updates the description of a specific format/version of the document. @param description: the new description. @type description: string @param format: the specific format for which the description should be updated. @type format: string @param version: the specific version for which the description should be updated. If not specified the last version will be used. @type version: integer """ if version is None: version = self.get_latest_version() format = normalize_format(format) self.more_info.set_description(description, format, version) self.touch() self._build_file_list('init') def set_flag(self, flagname, format, version=None): """ Sets a flag for a specific format/version of the document. @param flagname: a flag from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}. @type flagname: string @param format: the specific format for which the flag should be set. @type format: string @param version: the specific version for which the flag should be set. If not specified the last version will be used. @type version: integer """ if version is None: version = self.get_latest_version() format = normalize_format(format) self.more_info.set_flag(flagname, format, version) self.touch() self._build_file_list('init') def has_flag(self, flagname, format, version=None): """ Checks if a particular flag for a format/version is set. @param flagname: a flag from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}. @type flagname: string @param format: the specific format for which the flag should be set. @type format: string @param version: the specific version for which the flag should be set. If not specified the last version will be used. @type version: integer @return: True if the flag is set. @rtype: bool """ if version is None: version = self.get_latest_version() format = normalize_format(format) return self.more_info.has_flag(flagname, format, version) def unset_flag(self, flagname, format, version=None): """ Unsets a flag for a specific format/version of the document. @param flagname: a flag from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}. @type flagname: string @param format: the specific format for which the flag should be unset. @type format: string @param version: the specific version for which the flag should be unset. If not specified the last version will be used. @type version: integer """ if version is None: version = self.get_latest_version() format = normalize_format(format) self.more_info.unset_flag(flagname, format, version) self.touch() self._build_file_list('init') def get_comment(self, format, version=None): """ Retrieve the comment of a specific format/version of the document. @param format: the specific format for which the comment should be retrieved. @type format: string @param version: the specific version for which the comment should be retrieved. If not specified the last version will be used. @type version: integer @return: the comment. @rtype: string """ if version is None: version = self.get_latest_version() format = normalize_format(format) return self.more_info.get_comment(format, version) def get_description(self, format, version=None): """ Retrieve the description of a specific format/version of the document. @param format: the specific format for which the description should be retrieved. @type format: string @param version: the specific version for which the description should be retrieved. If not specified the last version will be used. @type version: integer @return: the description. @rtype: string """ if version is None: version = self.get_latest_version() format = normalize_format(format) return self.more_info.get_description(format, version) def hidden_p(self, format, version=None): """ Returns True if the file specified by the given format/version is hidden. @param format: the specific format for which the description should be retrieved. @type format: string @param version: the specific version for which the description should be retrieved. If not specified the last version will be used. @type version: integer @return: True if hidden. @rtype: bool """ if version is None: version = self.get_latest_version() return self.more_info.has_flag('HIDDEN', format, version) def get_docname(self): """ @return: the name of this document. @rtype: string """ return self.docname def get_base_dir(self): """ @return: the base directory on the local filesystem for this document (e.g. C{/soft/cdsweb/var/data/files/g0/123}) @rtype: string """ return self.basedir def get_type(self): """ @return: the type of this document. @rtype: string""" return self.doctype def get_recid(self): """ @return: the record id of the record to which this document is attached. @rtype: integer """ return self.recid def get_id(self): """ @return: the id of this document. @rtype: integer """ return self.id def pdf_a_p(self): """ @return: True if this document contains a PDF in PDF/A format. @rtype: bool""" return self.has_flag('PDF/A', 'pdf') def has_text(self, require_up_to_date=False, version=None): """ Return True if the text of this document has already been extracted. @param require_up_to_date: if True check the text was actually extracted after the most recent format of the given version. @type require_up_to_date: bool @param version: a version for which the text should have been extracted. If not specified the latest version is considered. @type version: integer @return: True if the text has already been extracted. @rtype: bool """ if version is None: version = self.get_latest_version() if os.path.exists(os.path.join(self.basedir, '.text;%i' % version)): if not require_up_to_date: return True else: docfiles = self.list_version_files(version) text_md = datetime.fromtimestamp(os.path.getmtime(os.path.join(self.basedir, '.text;%i' % version))) for docfile in docfiles: if text_md <= docfile.md: return False return True return False def get_file(self, format, version=""): """ Returns a L{BibDocFile} instance of this document corresponding to the specific format and version. @param format: the specific format. @type format: string @param version: the specific version for which the description should be retrieved. If not specified the last version will be used. @type version: integer @return: the L{BibDocFile} instance. @rtype: BibDocFile """ if version == "": docfiles = self.list_latest_files() else: version = int(version) docfiles = self.list_version_files(version) format = normalize_format(format) for docfile in docfiles: if (docfile.get_format()==format or not format): return docfile ## Let's skip the subformat specification and consider just the ## superformat superformat = get_superformat_from_format(format) for docfile in docfiles: if get_superformat_from_format(docfile.get_format()) == superformat: return docfile raise InvenioWebSubmitFileError, "No file called '%s' of format '%s', version '%s'" % (self.docname, format, version) def list_versions(self): """ @return: the list of existing version numbers for this document. @rtype: list of integer """ versions = [] for docfile in self.docfiles: if not docfile.get_version() in versions: versions.append(docfile.get_version()) versions.sort() return versions def delete(self): """ Delete this document. @see: L{undelete} for how to undelete the document. @raise InvenioWebSubmitFileError: in case of errors. """ try: today = datetime.today() self.change_name('DELETED-%s%s-%s' % (today.strftime('%Y%m%d%H%M%S'), today.microsecond, self.docname)) run_sql("UPDATE bibdoc SET status='DELETED' WHERE id=%s", (self.id,)) self.status = 'DELETED' except Exception, e: register_exception() raise InvenioWebSubmitFileError, "It's impossible to delete bibdoc %s: %s" % (self.id, e) def deleted_p(self): """ @return: True if this document has been deleted. @rtype: bool """ return self.status == 'DELETED' def empty_p(self): """ @return: True if this document is empty, i.e. it has no bibdocfile connected. @rtype: bool """ return len(self.docfiles) == 0 def undelete(self, previous_status=''): """ Undelete a deleted file (only if it was actually deleted via L{delete}). The previous C{status}, i.e. the restriction key can be provided. Otherwise the undeleted document will be public. @param previous_status: the previous status the should be restored. @type previous_status: string @raise InvenioWebSubmitFileError: in case of any error. """ bibrecdocs = BibRecDocs(self.recid) try: run_sql("UPDATE bibdoc SET status=%s WHERE id=%s AND status='DELETED'", (previous_status, self.id)) except Exception, e: raise InvenioWebSubmitFileError, "It's impossible to undelete bibdoc %s: %s" % (self.id, e) if self.docname.startswith('DELETED-'): try: # Let's remove DELETED-20080214144322- in front of the docname original_name = '-'.join(self.docname.split('-')[2:]) original_name = bibrecdocs.propose_unique_docname(original_name) self.change_name(original_name) except Exception, e: raise InvenioWebSubmitFileError, "It's impossible to restore the previous docname %s. %s kept as docname because: %s" % (original_name, self.docname, e) else: raise InvenioWebSubmitFileError, "Strange just undeleted docname isn't called DELETED-somedate-docname but %s" % self.docname def delete_file(self, format, version): """ Delete a specific format/version of this document on the filesystem. @param format: the particular format to be deleted. @type format: string @param version: the particular version to be deleted. @type version: integer @note: this operation is not reversible!""" try: afile = self.get_file(format, version) except InvenioWebSubmitFileError: return try: os.remove(afile.get_full_path()) - run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, afile.get_version(), afile.get_format)) + run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=%s AND format=%s", (self.id, afile.get_version(), afile.get_format())) + last_version = run_sql("SELECT max(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.id, ))[0][0] + if last_version: + ## Updating information about last version + run_sql("UPDATE bibdocfsinfo SET last_version=true WHERE id_bibdoc=%s AND version=%s", (self.id, last_version)) + run_sql("UPDATE bibdocfsinfo SET last_version=false WHERE id_bibdoc=%s AND version<>%s", (self.id, last_version)) except OSError: pass self.touch() self._build_file_list() def get_history(self): """ @return: a human readable and parsable string that represent the history of this document. @rtype: string """ ret = [] hst = run_sql("""SELECT action, docname, docformat, docversion, docsize, docchecksum, doctimestamp FROM hstDOCUMENT WHERE id_bibdoc=%s ORDER BY doctimestamp ASC""", (self.id, )) for row in hst: ret.append("%s %s '%s', format: '%s', version: %i, size: %s, checksum: '%s'" % (row[6].strftime('%Y-%m-%d %H:%M:%S'), row[0], row[1], row[2], row[3], nice_size(row[4]), row[5])) return ret def _build_file_list(self, context=''): """ Lists all files attached to the bibdoc. This function should be called everytime the bibdoc is modified. As a side effect it log everything that has happened to the bibdocfiles in the log facility, according to the context: "init": means that the function has been called; for the first time by a constructor, hence no logging is performed "": by default means to log every deleted file as deleted and every added file as added; "rename": means that every appearently deleted file is logged as renamef and every new file as renamet. """ def log_action(action, docid, docname, format, version, size, checksum, timestamp=''): """Log an action into the bibdoclog table.""" try: if timestamp: run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, %s)', (action, docid, docname, format, version, size, checksum, timestamp)) else: run_sql('INSERT DELAYED INTO hstDOCUMENT(action, id_bibdoc, docname, docformat, docversion, docsize, docchecksum, doctimestamp) VALUES(%s, %s, %s, %s, %s, %s, %s, NOW())', (action, docid, docname, format, version, size, checksum)) except DatabaseError: register_exception() def make_removed_added_bibdocfiles(previous_file_list): """Internal function for build the log of changed files.""" # Let's rebuild the previous situation old_files = {} for bibdocfile in previous_file_list: old_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md) # Let's rebuild the new situation new_files = {} for bibdocfile in self.docfiles: new_files[(bibdocfile.name, bibdocfile.format, bibdocfile.version)] = (bibdocfile.size, bibdocfile.checksum, bibdocfile.md) # Let's subtract from added file all the files that are present in # the old list, and let's add to deleted files that are not present # added file. added_files = dict(new_files) deleted_files = {} for key, value in old_files.iteritems(): if added_files.has_key(key): del added_files[key] else: deleted_files[key] = value return (added_files, deleted_files) if context != ('init', 'init_from_disk'): previous_file_list = list(self.docfiles) res = run_sql("SELECT status,docname,creation_date," "modification_date,more_info FROM bibdoc WHERE id=%s", (self.id,)) self.cd = res[0][2] self.md = res[0][3] self.docname = res[0][1] self.status = res[0][0] self.more_info = BibDocMoreInfo(self.id, blob_to_string(res[0][4])) self.docfiles = [] if CFG_BIBDOCFILE_ENABLE_BIBDOCFSINFO_CACHE and context == 'init': ## In normal init context we read from DB res = run_sql("SELECT version, format, cd, md, checksum, filesize FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.id, )) for version, format, cd, md, checksum, size in res: self.docfiles.append(BibDocFile( os.path.join(self.basedir, self.docname + format + ";%s" % version), self.doctype, version, self.docname, format, self.recid, self.id, self.status, checksum, self.more_info, human_readable=self.human_readable, cd=cd, md=md, size=size)) else: if os.path.exists(self.basedir): self.md5s = Md5Folder(self.basedir) files = os.listdir(self.basedir) files.sort() for afile in files: if not afile.startswith('.'): try: filepath = os.path.join(self.basedir, afile) dirname, basename, format, fileversion = decompose_file_with_version(filepath) checksum = self.md5s.get_checksum(afile) # we can append file: self.docfiles.append(BibDocFile(filepath, self.doctype, fileversion, basename, format, self.recid, self.id, self.status, checksum, self.more_info, human_readable=self.human_readable)) except Exception, e: register_exception() if context in ('init', 'init_from_disk'): return else: added_files, deleted_files = make_removed_added_bibdocfiles(previous_file_list) deletedstr = "DELETED" addedstr = "ADDED" if context == 'rename': deletedstr = "RENAMEDFROM" addedstr = "RENAMEDTO" for (docname, format, version), (size, checksum, md) in added_files.iteritems(): if context == 'rename': md = '' # No modification time log_action(addedstr, self.id, docname, format, version, size, checksum, md) for (docname, format, version), (size, checksum, md) in deleted_files.iteritems(): if context == 'rename': md = '' # No modification time log_action(deletedstr, self.id, docname, format, version, size, checksum, md) def _sync_to_db(self): """ Update the content of the bibdocfile table by taking what is available on the filesystem. """ self._build_file_list('init_from_disk') run_sql("DELETE FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.id,)) for afile in self.docfiles: run_sql("INSERT INTO bibdocfsinfo(id_bibdoc, version, format, last_version, cd, md, checksum, filesize, mime) VALUES(%s, %s, %s, false, %s, %s, %s, %s, %s)", (self.id, afile.get_version(), afile.get_format(), afile.cd, afile.md, afile.get_checksum(), afile.get_size(), afile.mime)) run_sql("UPDATE bibdocfsinfo SET last_version=true WHERE id_bibdoc=%s AND version=%s", (self.id, self.get_latest_version())) def get_total_size_latest_version(self): """Return the total size used on disk of all the files belonging to this bibdoc and corresponding to the latest version.""" ret = 0 for bibdocfile in self.list_latest_files(): ret += bibdocfile.get_size() return ret def get_total_size(self): """Return the total size used on disk of all the files belonging to this bibdoc.""" ret = 0 for bibdocfile in self.list_all_files(): ret += bibdocfile.get_size() return ret def list_all_files(self, list_hidden=True): """Returns all the docfiles linked with the given bibdoc.""" if list_hidden: return self.docfiles else: return [afile for afile in self.docfiles if not afile.hidden_p()] def list_latest_files(self, list_hidden=True): """Returns all the docfiles within the last version.""" return self.list_version_files(self.get_latest_version(), list_hidden=list_hidden) def list_version_files(self, version, list_hidden=True): """Return all the docfiles of a particular version.""" version = int(version) return [docfile for docfile in self.docfiles if docfile.get_version() == version and (list_hidden or not docfile.hidden_p())] def check_file_exists(self, path): """ Check if a file with the same content of the file pointed in C{path} is already attached to this record. @param path: the file to be checked against. @type path: string @return: True if a file with the requested content is already attached to the record. @rtype: bool """ # Let's consider all the latest files for afile in self.list_latest_files(): if afile.is_identical_to(path): return True return False def get_latest_version(self): """ Returns the latest existing version number for the given bibdoc. If no file is associated to this bibdoc, returns '0'. """ version = 0 for bibdocfile in self.docfiles: if bibdocfile.get_version() > version: version = bibdocfile.get_version() return version def get_file_number(self): """Return the total number of files.""" return len(self.docfiles) def register_download(self, ip_address, version, format, userid=0): """Register the information about a download of a particular file.""" format = normalize_format(format) if format[:1] == '.': format = format[1:] format = format.upper() return run_sql("INSERT DELAYED INTO rnkDOWNLOADS " "(id_bibrec,id_bibdoc,file_version,file_format," "id_user,client_host,download_time) VALUES " "(%s,%s,%s,%s,%s,INET_ATON(%s),NOW())", (self.recid, self.id, version, format, userid, ip_address,)) def generic_path2bidocfile(fullpath): """ Returns a BibDocFile objects that wraps the given fullpath. @note: the object will contain the minimum information that can be guessed from the fullpath (e.g. docname, format, subformat, version, md5, creation_date, modification_date). It won't contain for example a comment, a description, a doctype, a restriction. """ fullpath = os.path.abspath(fullpath) try: path, name, format, version = decompose_file_with_version(fullpath) except ValueError: ## There is no version version = 0 path, name, format = decompose_file(fullpath) md5folder = Md5Folder(path) checksum = md5folder.get_checksum(os.path.basename(fullpath)) return BibDocFile(fullpath=fullpath, doctype=None, version=version, name=name, format=format, recid=0, docid=0, status=None, checksum=checksum, more_info=None) class BibDocFile: """This class represents a physical file in the Invenio filesystem. It should never be instantiated directly""" def __init__(self, fullpath, doctype, version, name, format, recid, docid, status, checksum, more_info=None, human_readable=False, cd=None, md=None, size=None): self.fullpath = os.path.abspath(fullpath) self.doctype = doctype self.docid = docid self.recid = recid self.version = version self.status = status self.checksum = checksum self.human_readable = human_readable if more_info: self.description = more_info.get_description(format, version) self.comment = more_info.get_comment(format, version) self.flags = more_info.get_flags(format, version) else: self.description = None self.comment = None self.flags = [] self.format = normalize_format(format) self.superformat = get_superformat_from_format(self.format) self.subformat = get_subformat_from_format(self.format) self.fullname = name if format: self.fullname += self.superformat self.mime, self.encoding = _mimes.guess_type(self.fullname) if self.mime is None: self.mime = "application/octet-stream" self.more_info = more_info self.hidden = 'HIDDEN' in self.flags self.size = size or os.path.getsize(fullpath) self.md = md or datetime.fromtimestamp(os.path.getmtime(fullpath)) try: self.cd = cd or datetime.fromtimestamp(os.path.getctime(fullpath)) except OSError: self.cd = self.md self.name = name self.dir = os.path.dirname(fullpath) if self.subformat: self.url = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.name, self.superformat), {'subformat' : self.subformat}) self.fullurl = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.name, self.superformat), {'subformat' : self.subformat, 'version' : self.version}) else: self.url = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.name, self.superformat), {}) self.fullurl = create_url('%s/%s/%s/files/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, self.name, self.superformat), {'version' : self.version}) self.etag = '"%i%s%i"' % (self.docid, self.format, self.version) self.magic = None def __repr__(self): return ('BibDocFile(%s, %s, %i, %s, %s, %i, %i, %s, %s, %s, %s)' % (repr(self.fullpath), repr(self.doctype), self.version, repr(self.name), repr(self.format), self.recid, self.docid, repr(self.status), repr(self.checksum), repr(self.more_info), repr(self.human_readable))) def __str__(self): out = '%s:%s:%s:%s:fullpath=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullpath) out += '%s:%s:%s:%s:fullname=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullname) out += '%s:%s:%s:%s:name=%s\n' % (self.recid, self.docid, self.version, self.format, self.name) out += '%s:%s:%s:%s:subformat=%s\n' % (self.recid, self.docid, self.version, self.format, get_subformat_from_format(self.format)) out += '%s:%s:%s:%s:status=%s\n' % (self.recid, self.docid, self.version, self.format, self.status) out += '%s:%s:%s:%s:checksum=%s\n' % (self.recid, self.docid, self.version, self.format, self.checksum) if self.human_readable: out += '%s:%s:%s:%s:size=%s\n' % (self.recid, self.docid, self.version, self.format, nice_size(self.size)) else: out += '%s:%s:%s:%s:size=%s\n' % (self.recid, self.docid, self.version, self.format, self.size) out += '%s:%s:%s:%s:creation time=%s\n' % (self.recid, self.docid, self.version, self.format, self.cd) out += '%s:%s:%s:%s:modification time=%s\n' % (self.recid, self.docid, self.version, self.format, self.md) out += '%s:%s:%s:%s:magic=%s\n' % (self.recid, self.docid, self.version, self.format, self.get_magic()) out += '%s:%s:%s:%s:mime=%s\n' % (self.recid, self.docid, self.version, self.format, self.mime) out += '%s:%s:%s:%s:encoding=%s\n' % (self.recid, self.docid, self.version, self.format, self.encoding) out += '%s:%s:%s:%s:url=%s\n' % (self.recid, self.docid, self.version, self.format, self.url) out += '%s:%s:%s:%s:fullurl=%s\n' % (self.recid, self.docid, self.version, self.format, self.fullurl) out += '%s:%s:%s:%s:description=%s\n' % (self.recid, self.docid, self.version, self.format, self.description) out += '%s:%s:%s:%s:comment=%s\n' % (self.recid, self.docid, self.version, self.format, self.comment) out += '%s:%s:%s:%s:hidden=%s\n' % (self.recid, self.docid, self.version, self.format, self.hidden) out += '%s:%s:%s:%s:flags=%s\n' % (self.recid, self.docid, self.version, self.format, self.flags) out += '%s:%s:%s:%s:etag=%s\n' % (self.recid, self.docid, self.version, self.format, self.etag) return out def display(self, ln = CFG_SITE_LANG): """Returns a formatted representation of this docfile.""" return websubmit_templates.tmpl_bibdocfile_filelist( ln = ln, recid = self.recid, version = self.version, md = self.md, name = self.name, superformat = self.superformat, subformat = self.subformat, nice_size = nice_size(self.size), description = self.description or '' ) def is_identical_to(self, path): """ @path: the path of another file on disk. @return: True if L{path} is contains bitwise the same content. """ if os.path.getsize(path) != self.size: return False if calculate_md5(path) != self.checksum: return False return filecmp.cmp(self.get_full_path(), path) def is_restricted(self, user_info): """Returns restriction state. (see acc_authorize_action return values)""" if self.status not in ('', 'DELETED'): return check_bibdoc_authorization(user_info, status=self.status) elif self.status == 'DELETED': return (1, 'File has ben deleted') else: return (0, '') def is_icon(self, subformat_re=CFG_WEBSUBMIT_ICON_SUBFORMAT_RE): """ @param subformat_re: by default the convention is that L{CFG_WEBSUBMIT_ICON_SUBFORMAT_RE} is used as a subformat indicator to mean that a particular format is to be used as an icon. Specifiy a different subformat if you need to use a different convention. @type subformat: compiled regular expression @return: True if this file is an icon. @rtype: bool """ return bool(subformat_re.match(self.subformat)) def hidden_p(self): return self.hidden def get_url(self): return self.url def get_type(self): return self.doctype def get_path(self): return self.fullpath def get_bibdocid(self): return self.docid def get_name(self): return self.name def get_full_name(self): return self.fullname def get_full_path(self): return self.fullpath def get_format(self): return self.format def get_subformat(self): return self.subformat def get_superformat(self): return self.superformat def get_size(self): return self.size def get_version(self): return self.version def get_checksum(self): return self.checksum def get_description(self): return self.description def get_comment(self): return self.comment def get_content(self): """Returns the binary content of the file.""" content_fd = open(self.fullpath, 'rb') content = content_fd.read() content_fd.close() return content def get_recid(self): """Returns the recid connected with the bibdoc of this file.""" return self.recid def get_status(self): """Returns the status of the file, i.e. either '', 'DELETED' or a restriction keyword.""" return self.status def get_magic(self): """Return all the possible guesses from the magic library about the content of the file.""" if self.magic is None and CFG_HAS_MAGIC: magic_cookies = _get_magic_cookies() magic_result = [] for key in magic_cookies.keys(): magic_result.append(magic_cookies[key].file(self.fullpath)) self.magic = tuple(magic_result) return self.magic def check(self): """Return True if the checksum corresponds to the file.""" return calculate_md5(self.fullpath) == self.checksum def stream(self, req, download=False): """Stream the file. Note that no restriction check is being done here, since restrictions have been checked previously inside websubmit_webinterface.py.""" if os.path.exists(self.fullpath): if random.random() < CFG_BIBDOCFILE_MD5_CHECK_PROBABILITY and calculate_md5(self.fullpath) != self.checksum: raise InvenioWebSubmitFileError, "File %s, version %i, for record %s is corrupted!" % (self.fullname, self.version, self.recid) stream_file(req, self.fullpath, "%s%s" % (self.name, self.superformat), self.mime, self.encoding, self.etag, self.checksum, self.fullurl, download=download) raise apache.SERVER_RETURN, apache.DONE else: req.status = apache.HTTP_NOT_FOUND raise InvenioWebSubmitFileError, "%s does not exists!" % self.fullpath _RE_STATUS_PARSER = re.compile(r'^(?Pemail|group|egroup|role|firerole|status):\s*(?P.*)$', re.S + re.I) def check_bibdoc_authorization(user_info, status): """ Check if the user is authorized to access a document protected with the given status. L{status} is a string of the form:: auth_type: auth_value where C{auth_type} can have values in:: email, group, role, firerole, status and C{auth_value} has a value interpreted againsta C{auth_type}: - C{email}: the user can access the document if his/her email matches C{auth_value} - C{group}: the user can access the document if one of the groups (local or external) of which he/she is member matches C{auth_value} - C{role}: the user can access the document if he/she belongs to the WebAccess role specified in C{auth_value} - C{firerole}: the user can access the document if he/she is implicitly matched by the role described by the firewall like role definition in C{auth_value} - C{status}: the user can access the document if he/she is authorized to for the action C{viewrestrdoc} with C{status} paramter having value C{auth_value} @note: If no C{auth_type} is specified or if C{auth_type} is not one of the above, C{auth_value} will be set to the value contained in the parameter C{status}, and C{auth_type} will be considered to be C{status}. @param user_info: the user_info dictionary @type: dict @param status: the status of the document. @type status: string @return: a tuple, of the form C{(auth_code, auth_message)} where auth_code is 0 if the authorization is granted and greater than 0 otherwise. @rtype: (int, string) @raise ValueError: in case of unexpected parsing error. """ def parse_status(status): g = _RE_STATUS_PARSER.match(status) if g: return (g.group('type').lower(), g.group('value')) else: return ('status', status) if acc_is_user_in_role(user_info, acc_get_role_id(SUPERADMINROLE)): return (0, CFG_WEBACCESS_WARNING_MSGS[0]) auth_type, auth_value = parse_status(status) if auth_type == 'status': return acc_authorize_action(user_info, 'viewrestrdoc', status=auth_value) elif auth_type == 'email': if not auth_value.lower().strip() == user_info['email'].lower().strip(): return (1, 'You must be member of the group %s in order to access this document' % repr(auth_value)) elif auth_type == 'group': if not auth_value in user_info['group']: return (1, 'You must be member of the group %s in order to access this document' % repr(auth_value)) elif auth_type == 'role': if not acc_is_user_in_role(user_info, acc_get_role_id(auth_value)): return (1, 'You must be member in the role %s in order to access this document' % repr(auth_value)) elif auth_type == 'firerole': if not acc_firerole_check_user(user_info, compile_role_definition(auth_value)): return (1, 'You must be authorized in order to access this document') else: raise ValueError, 'Unexpected authorization type %s for %s' % (repr(auth_type), repr(auth_value)) return (0, CFG_WEBACCESS_WARNING_MSGS[0]) _RE_BAD_MSIE = re.compile("MSIE\s+(\d+\.\d+)") def stream_file(req, fullpath, fullname=None, mime=None, encoding=None, etag=None, md5=None, location=None, download=False): """This is a generic function to stream a file to the user. If fullname, mime, encoding, and location are not provided they will be guessed based on req and fullpath. md5 should be passed as an hexadecimal string. """ def normal_streaming(size): req.set_content_length(size) req.send_http_header() if not req.header_only: req.sendfile(fullpath) return "" def single_range(size, the_range): req.set_content_length(the_range[1]) req.headers_out['Content-Range'] = 'bytes %d-%d/%d' % (the_range[0], the_range[0] + the_range[1] - 1, size) req.status = apache.HTTP_PARTIAL_CONTENT req.send_http_header() if not req.header_only: req.sendfile(fullpath, the_range[0], the_range[1]) return "" def multiple_ranges(size, ranges, mime): req.status = apache.HTTP_PARTIAL_CONTENT boundary = '%s%04d' % (time.strftime('THIS_STRING_SEPARATES_%Y%m%d%H%M%S'), random.randint(0, 9999)) req.content_type = 'multipart/byteranges; boundary=%s' % boundary content_length = 0 for arange in ranges: content_length += len('--%s\r\n' % boundary) content_length += len('Content-Type: %s\r\n' % mime) content_length += len('Content-Range: bytes %d-%d/%d\r\n' % (arange[0], arange[0] + arange[1] - 1, size)) content_length += len('\r\n') content_length += arange[1] content_length += len('\r\n') content_length += len('--%s--\r\n' % boundary) req.set_content_length(content_length) req.send_http_header() if not req.header_only: for arange in ranges: req.write('--%s\r\n' % boundary, 0) req.write('Content-Type: %s\r\n' % mime, 0) req.write('Content-Range: bytes %d-%d/%d\r\n' % (arange[0], arange[0] + arange[1] - 1, size), 0) req.write('\r\n', 0) req.sendfile(fullpath, arange[0], arange[1]) req.write('\r\n', 0) req.write('--%s--\r\n' % boundary) req.flush() return "" def parse_date(date): """According to a date can come in three formats (in order of preference): Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format Moreover IE is adding some trailing information after a ';'. Wrong dates should be simpled ignored. This function return the time in seconds since the epoch GMT or None in case of errors.""" if not date: return None try: date = date.split(';')[0].strip() # Because of IE ## Sun, 06 Nov 1994 08:49:37 GMT return time.mktime(time.strptime(date, '%a, %d %b %Y %X %Z')) except: try: ## Sun, 06 Nov 1994 08:49:37 GMT return time.mktime(time.strptime(date, '%A, %d-%b-%y %H:%M:%S %Z')) except: try: ## Sun, 06 Nov 1994 08:49:37 GMT return time.mktime(date) except: return None def parse_ranges(ranges): """According to a (multiple) range request comes in the form: bytes=20-30,40-60,70-,-80 with the meaning: from byte to 20 to 30 inclusive (11 bytes) from byte to 40 to 60 inclusive (21 bytes) from byte 70 to (size - 1) inclusive (size - 70 bytes) from byte size - 80 to (size - 1) inclusive (80 bytes) This function will return the list of ranges in the form: [[first_byte, last_byte], ...] If first_byte or last_byte aren't specified they'll be set to None If the list is not well formatted it will return None """ try: if ranges.startswith('bytes') and '=' in ranges: ranges = ranges.split('=')[1].strip() else: return None ret = [] for arange in ranges.split(','): arange = arange.strip() if arange.startswith('-'): ret.append([None, int(arange[1:])]) elif arange.endswith('-'): ret.append([int(arange[:-1]), None]) else: ret.append(map(int, arange.split('-'))) return ret except: return None def parse_tags(tags): """Return a list of tags starting from a comma separated list.""" return [tag.strip() for tag in tags.split(',')] def fix_ranges(ranges, size): """Complementary to parse_ranges it will transform all the ranges into (first_byte, length), adjusting all the value based on the actual size provided. """ ret = [] for arange in ranges: if (arange[0] is None and arange[1] > 0) or arange[0] < size: if arange[0] is None: arange[0] = size - arange[1] elif arange[1] is None: arange[1] = size - arange[0] else: arange[1] = arange[1] - arange[0] + 1 arange[0] = max(0, arange[0]) arange[1] = min(size - arange[0], arange[1]) if arange[1] > 0: ret.append(arange) return ret def get_normalized_headers(headers): """Strip and lowerize all the keys of the headers dictionary plus strip, lowerize and transform known headers value into their value.""" ret = { 'if-match' : None, 'unless-modified-since' : None, 'if-modified-since' : None, 'range' : None, 'if-range' : None, 'if-none-match' : None, } for key, value in req.headers_in.iteritems(): key = key.strip().lower() value = value.strip() if key in ('unless-modified-since', 'if-modified-since'): value = parse_date(value) elif key == 'range': value = parse_ranges(value) elif key == 'if-range': value = parse_date(value) or parse_tags(value) elif key in ('if-match', 'if-none-match'): value = parse_tags(value) if value: ret[key] = value return ret headers = get_normalized_headers(req.headers_in) g = _RE_BAD_MSIE.search(headers.get('user-agent', "MSIE 6.0")) bad_msie = g and float(g.group(1)) < 9.0 if CFG_BIBDOCFILE_USE_XSENDFILE: ## If XSendFile is supported by the server, let's use it. if os.path.exists(fullpath): if fullname is None: fullname = os.path.basename(fullpath) if bad_msie: ## IE is confused by quotes req.headers_out["Content-Disposition"] = 'attachment; filename=%s' % fullname.replace('"', '\\"') elif download: req.headers_out["Content-Disposition"] = 'attachment; filename="%s"' % fullname.replace('"', '\\"') else: ## IE is confused by inline req.headers_out["Content-Disposition"] = 'inline; filename="%s"' % fullname.replace('"', '\\"') req.headers_out["X-Sendfile"] = fullpath if mime is None: format = decompose_file(fullpath)[2] (mime, encoding) = _mimes.guess_type(fullpath) if mime is None: mime = "application/octet-stream" if not bad_msie: ## IE is confused by not supported mimetypes req.content_type = mime return "" else: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND if headers['if-match']: if etag is not None and etag not in headers['if-match']: raise apache.SERVER_RETURN, apache.HTTP_PRECONDITION_FAILED if os.path.exists(fullpath): mtime = os.path.getmtime(fullpath) if fullname is None: fullname = os.path.basename(fullpath) if mime is None: (mime, encoding) = _mimes.guess_type(fullpath) if mime is None: mime = "application/octet-stream" if location is None: location = req.uri if not bad_msie: ## IE is confused by not supported mimetypes req.content_type = mime req.encoding = encoding req.filename = fullname req.headers_out["Last-Modified"] = time.strftime('%a, %d %b %Y %X GMT', time.gmtime(mtime)) if CFG_ENABLE_HTTP_RANGE_REQUESTS: req.headers_out["Accept-Ranges"] = "bytes" else: req.headers_out["Accept-Ranges"] = "none" req.headers_out["Content-Location"] = location if etag is not None: req.headers_out["ETag"] = etag if md5 is not None: req.headers_out["Content-MD5"] = base64.encodestring(binascii.unhexlify(md5.upper()))[:-1] if bad_msie: ## IE is confused by quotes req.headers_out["Content-Disposition"] = 'attachment; filename=%s' % fullname.replace('"', '\\"') elif download: req.headers_out["Content-Disposition"] = 'attachment; filename="%s"' % fullname.replace('"', '\\"') else: ## IE is confused by inline req.headers_out["Content-Disposition"] = 'inline; filename="%s"' % fullname.replace('"', '\\"') size = os.path.getsize(fullpath) if not size: try: raise Exception, '%s exists but is empty' % fullpath except Exception: register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND if headers['if-modified-since'] and headers['if-modified-since'] >= mtime: raise apache.SERVER_RETURN, apache.HTTP_NOT_MODIFIED if headers['if-none-match']: if etag is not None and etag in headers['if-none-match']: raise apache.SERVER_RETURN, apache.HTTP_NOT_MODIFIED if headers['unless-modified-since'] and headers['unless-modified-since'] < mtime: return normal_streaming(size) if CFG_ENABLE_HTTP_RANGE_REQUESTS and headers['range']: try: if headers['if-range']: if etag is None or etag not in headers['if-range']: return normal_streaming(size) ranges = fix_ranges(headers['range'], size) except: return normal_streaming(size) if len(ranges) > 1: return multiple_ranges(size, ranges, mime) elif ranges: return single_range(size, ranges[0]) else: raise apache.SERVER_RETURN, apache.HTTP_RANGE_NOT_SATISFIABLE else: return normal_streaming(size) else: raise apache.SERVER_RETURN, apache.HTTP_NOT_FOUND def stream_restricted_icon(req): """Return the content of the "Restricted Icon" file.""" stream_file(req, '%s/img/restricted.gif' % CFG_WEBDIR) raise apache.SERVER_RETURN, apache.DONE def list_types_from_array(bibdocs): """Retrieves the list of types from the given bibdoc list.""" types = [] for bibdoc in bibdocs: if not bibdoc.get_type() in types: types.append(bibdoc.get_type()) types.sort() if 'Main' in types: ## Move 'Main' at the beginning types.remove('Main') types.insert(0, 'Main') return types def list_versions_from_array(docfiles): """Retrieve the list of existing versions from the given docfiles list.""" versions = [] for docfile in docfiles: if not docfile.get_version() in versions: versions.append(docfile.get_version()) versions.sort() versions.reverse() return versions def _make_base_dir(docid): """Given a docid it returns the complete path that should host its files.""" group = "g" + str(int(int(docid) / CFG_WEBSUBMIT_FILESYSTEM_BIBDOC_GROUP_LIMIT)) return os.path.join(CFG_WEBSUBMIT_FILEDIR, group, str(docid)) class Md5Folder: """Manage all the Md5 checksum about a folder""" def __init__(self, folder): """Initialize the class from the md5 checksum of a given path""" self.folder = folder try: self.load() except InvenioWebSubmitFileError: self.md5s = {} self.update() def update(self, only_new = True): """Update the .md5 file with the current files. If only_new is specified then only not already calculated file are calculated.""" if not only_new: self.md5s = {} if os.path.exists(self.folder): for filename in os.listdir(self.folder): if filename not in self.md5s and not filename.startswith('.'): self.md5s[filename] = calculate_md5(os.path.join(self.folder, filename)) self.store() def store(self): """Store the current md5 dictionary into .md5""" try: old_umask = os.umask(022) md5file = open(os.path.join(self.folder, ".md5"), "w") for key, value in self.md5s.items(): md5file.write('%s *%s\n' % (value, key)) md5file.close() os.umask(old_umask) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while storing .md5 for folder '%s': '%s'" % (self.folder, e) def load(self): """Load .md5 into the md5 dictionary""" self.md5s = {} try: md5file = open(os.path.join(self.folder, ".md5"), "r") for row in md5file: md5hash = row[:32] filename = row[34:].strip() self.md5s[filename] = md5hash md5file.close() except IOError: self.update() except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while loading .md5 for folder '%s': '%s'" % (self.folder, e) def check(self, filename = ''): """Check the specified file or all the files for which it exists a hash for being coherent with the stored hash.""" if filename and filename in self.md5s.keys(): try: return self.md5s[filename] == calculate_md5(os.path.join(self.folder, filename)) except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e) else: for filename, md5hash in self.md5s.items(): try: if calculate_md5(os.path.join(self.folder, filename)) != md5hash: return False except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while loading '%s': '%s'" % (os.path.join(self.folder, filename), e) return True def get_checksum(self, filename): """Return the checksum of a physical file.""" md5hash = self.md5s.get(filename, None) if md5hash is None: self.update() # Now it should not fail! md5hash = self.md5s[filename] return md5hash def calculate_md5_external(filename): """Calculate the md5 of a physical file through md5sum Command Line Tool. This is suitable for file larger than 256Kb.""" try: md5_result = os.popen(CFG_PATH_MD5SUM + ' -b %s' % escape_shell_arg(filename)) ret = md5_result.read()[:32] md5_result.close() if len(ret) != 32: # Error in running md5sum. Let's fallback to internal # algorithm. return calculate_md5(filename, force_internal=True) else: return ret except Exception, e: raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e) def calculate_md5(filename, force_internal=False): """Calculate the md5 of a physical file. This is suitable for files smaller than 256Kb.""" if not CFG_PATH_MD5SUM or force_internal or os.path.getsize(filename) < CFG_BIBDOCFILE_MD5_THRESHOLD: try: to_be_read = open(filename, "rb") computed_md5 = md5() while True: buf = to_be_read.read(CFG_BIBDOCFILE_MD5_BUFFER) if buf: computed_md5.update(buf) else: break to_be_read.close() return computed_md5.hexdigest() except Exception, e: register_exception() raise InvenioWebSubmitFileError, "Encountered an exception while calculating md5 for file '%s': '%s'" % (filename, e) else: return calculate_md5_external(filename) def bibdocfile_url_to_bibrecdocs(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns a BibRecDocs object for the corresponding recid.""" recid = decompose_bibdocfile_url(url)[0] return BibRecDocs(recid) def bibdocfile_url_to_bibdoc(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns a BibDoc object for the corresponding recid/docname.""" docname = decompose_bibdocfile_url(url)[1] return bibdocfile_url_to_bibrecdocs(url).get_bibdoc(docname) def bibdocfile_url_to_bibdocfile(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns a BibDocFile object for the corresponding recid/docname/format.""" dummy, dummy, format = decompose_bibdocfile_url(url) return bibdocfile_url_to_bibdoc(url).get_file(format) def bibdocfile_url_to_fullpath(url): """Given an URL in the form CFG_SITE_[SECURE_]URL/CFG_SITE_RECORD/xxx/files/... it returns the fullpath for the corresponding recid/docname/format.""" return bibdocfile_url_to_bibdocfile(url).get_full_path() def bibdocfile_url_p(url): """Return True when the url is a potential valid url pointing to a fulltext owned by a system.""" if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL): return True if not (url.startswith('%s/%s/' % (CFG_SITE_URL, CFG_SITE_RECORD)) or url.startswith('%s/%s/' % (CFG_SITE_SECURE_URL, CFG_SITE_RECORD))): return False splitted_url = url.split('/files/') return len(splitted_url) == 2 and splitted_url[0] != '' and splitted_url[1] != '' def get_docid_from_bibdocfile_fullpath(fullpath): """Given a bibdocfile fullpath (e.g. "CFG_WEBSUBMIT_FILEDIR/g0/123/bar.pdf;1") returns the docid (e.g. 123).""" if not fullpath.startswith(os.path.join(CFG_WEBSUBMIT_FILEDIR, 'g')): raise InvenioWebSubmitFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath dirname, base, extension, version = decompose_file_with_version(fullpath) try: return int(dirname.split('/')[-1]) except: raise InvenioWebSubmitFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath def decompose_bibdocfile_fullpath(fullpath): """Given a bibdocfile fullpath (e.g. "CFG_WEBSUBMIT_FILEDIR/g0/123/bar.pdf;1") returns a quadruple (recid, docname, format, version).""" if not fullpath.startswith(os.path.join(CFG_WEBSUBMIT_FILEDIR, 'g')): raise InvenioWebSubmitFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath dirname, base, extension, version = decompose_file_with_version(fullpath) try: docid = int(dirname.split('/')[-1]) bibdoc = BibDoc(docid) recid = bibdoc.get_recid() docname = bibdoc.get_docname() return recid, docname, extension, version except: raise InvenioWebSubmitFileError, "Fullpath %s doesn't correspond to a valid bibdocfile fullpath" % fullpath def decompose_bibdocfile_url(url): """Given a bibdocfile_url return a triple (recid, docname, format).""" if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL): return decompose_bibdocfile_very_old_url(url) if url.startswith('%s/%s/' % (CFG_SITE_URL, CFG_SITE_RECORD)): recid_file = url[len('%s/%s/' % (CFG_SITE_URL, CFG_SITE_RECORD)):] elif url.startswith('%s/%s/' % (CFG_SITE_SECURE_URL, CFG_SITE_RECORD)): recid_file = url[len('%s/%s/' % (CFG_SITE_SECURE_URL, CFG_SITE_RECORD)):] else: raise InvenioWebSubmitFileError, "Url %s doesn't correspond to a valid record inside the system." % url recid_file = recid_file.replace('/files/', '/') recid, docname, format = decompose_file(urllib.unquote(recid_file)) if not recid and docname.isdigit(): ## If the URL was something similar to CFG_SITE_URL/CFG_SITE_RECORD/123 return (int(docname), '', '') return (int(recid), docname, format) re_bibdocfile_old_url = re.compile(r'/%s/(\d*)/files/' % CFG_SITE_RECORD) def decompose_bibdocfile_old_url(url): """Given a bibdocfile old url (e.g. CFG_SITE_URL/CFG_SITE_RECORD/123/files) it returns the recid.""" g = re_bibdocfile_old_url.search(url) if g: return int(g.group(1)) raise InvenioWebSubmitFileError('%s is not a valid old bibdocfile url' % url) def decompose_bibdocfile_very_old_url(url): """Decompose an old /getfile.py? URL""" if url.startswith('%s/getfile.py' % CFG_SITE_URL) or url.startswith('%s/getfile.py' % CFG_SITE_SECURE_URL): params = urllib.splitquery(url)[1] if params: try: params = cgi.parse_qs(params) if 'docid' in params: docid = int(params['docid'][0]) bibdoc = BibDoc(docid) recid = bibdoc.get_recid() docname = bibdoc.get_docname() elif 'recid' in params: recid = int(params['recid'][0]) if 'name' in params: docname = params['name'][0] else: docname = '' else: raise InvenioWebSubmitFileError('%s has not enough params to correspond to a bibdocfile.' % url) format = normalize_format(params.get('format', [''])[0]) return (recid, docname, format) except Exception, e: raise InvenioWebSubmitFileError('Problem with %s: %s' % (url, e)) else: raise InvenioWebSubmitFileError('%s has no params to correspond to a bibdocfile.' % url) else: raise InvenioWebSubmitFileError('%s is not a valid very old bibdocfile url' % url) def get_docname_from_url(url): """Return a potential docname given a url""" path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2] filename = os.path.split(path)[-1] return file_strip_ext(filename) def get_format_from_url(url): """Return a potential format given a url""" path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2] filename = os.path.split(path)[-1] return filename[len(file_strip_ext(filename)):] def clean_url(url): """Given a local url e.g. a local path it render it a realpath.""" if is_url_a_local_file(url): path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2] return os.path.abspath(path) else: return url def is_url_a_local_file(url): """Return True if the given URL is pointing to a local file.""" protocol = urllib2.urlparse.urlsplit(url)[0] return protocol in ('', 'file') def check_valid_url(url): """ Check for validity of a url or a file. @param url: the URL to check @type url: string @raise StandardError: if the URL is not a valid URL. """ try: if is_url_a_local_file(url): path = urllib2.urlparse.urlsplit(urllib.unquote(url))[2] if os.path.abspath(path) != path: raise StandardError, "%s is not a normalized path (would be %s)." % (path, os.path.normpath(path)) for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR, CFG_TMPSHAREDDIR, CFG_WEBSUBMIT_STORAGEDIR]: if path.startswith(allowed_path): dummy_fd = open(path) dummy_fd.close() return raise StandardError, "%s is not in one of the allowed paths." % path else: try: open_url(url) except InvenioBibdocfileUnauthorizedURL, e: raise StandardError, str(e) except Exception, e: raise StandardError, "%s is not a correct url: %s" % (url, e) def safe_mkstemp(suffix, prefix='bibdocfile_'): """Create a temporary filename that don't have any '.' inside a part from the suffix.""" tmpfd, tmppath = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR) # Close the file and leave the responsability to the client code to # correctly open/close it. os.close(tmpfd) if '.' not in suffix: # Just in case format is empty return tmppath while '.' in os.path.basename(tmppath)[:-len(suffix)]: os.remove(tmppath) tmpfd, tmppath = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=CFG_TMPDIR) os.close(tmpfd) return tmppath def download_local_file(filename, format=None): """ Copies a local file to Invenio's temporary directory. @param filename: the name of the file to copy @type filename: string @param format: the format of the file to copy (will be found if not specified) @type format: string @return: the path of the temporary file created @rtype: string @raise StandardError: if something went wrong """ # Make sure the format is OK. if format is None: format = guess_format_from_url(filename) else: format = normalize_format(format) tmppath = '' # Now try to copy. try: path = urllib2.urlparse.urlsplit(urllib.unquote(filename))[2] if os.path.abspath(path) != path: raise StandardError, "%s is not a normalized path (would be %s)." \ % (path, os.path.normpath(path)) for allowed_path in CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS + [CFG_TMPDIR, CFG_WEBSUBMIT_STORAGEDIR]: if path.startswith(allowed_path): tmppath = safe_mkstemp(format) shutil.copy(path, tmppath) if os.path.getsize(tmppath) == 0: os.remove(tmppath) raise StandardError, "%s seems to be empty" % filename break else: raise StandardError, "%s is not in one of the allowed paths." % path except Exception, e: raise StandardError, "Impossible to copy the local file '%s': %s" % \ (filename, str(e)) return tmppath def download_external_url(url, format=None): """ Download a url (if it corresponds to a remote file) and return a local url to it. @param url: the URL to download @type url: string @param format: the format of the file (will be found if not specified) @type format: string @return: the path to the download local file @rtype: string @raise StandardError: if the download failed """ tmppath = None # Make sure the format is OK. if format is None: # First try to find a known extension to the URL format = decompose_file(url, skip_version=True, only_known_extensions=True)[2] if not format: # No correct format could be found. Will try to get it from the # HTTP message headers. format = '' else: format = normalize_format(format) from_file, to_file, tmppath = None, None, '' try: from_file = open_url(url) except InvenioBibdocfileUnauthorizedURL, e: raise StandardError, str(e) except urllib2.URLError, e: raise StandardError, 'URL could not be opened: %s' % str(e) if not format: # We could not determine the format from the URL, so let's try # to read it from the HTTP headers. format = get_format_from_http_response(from_file) try: tmppath = safe_mkstemp(format) to_file = open(tmppath, 'w') while True: block = from_file.read(CFG_BIBDOCFILE_BLOCK_SIZE) if not block: break to_file.write(block) to_file.close() from_file.close() if os.path.getsize(tmppath) == 0: raise StandardError, "%s seems to be empty" % url except Exception, e: # Try to close and remove the temporary file. try: to_file.close() except Exception: pass try: os.remove(tmppath) except Exception: pass raise StandardError, "Error when downloading %s into %s: %s" % \ (url, tmppath, e) return tmppath def get_format_from_http_response(response): """ Tries to retrieve the format of the file from the message headers of the HTTP response. @param response: the HTTP response @type response: file-like object (as returned by urllib.urlopen) @return: the format of the remote resource @rtype: string """ def parse_content_type(text): return text.split(';')[0].strip() def parse_content_disposition(text): for item in text.split(';'): item = item.strip() if item.strip().startswith('filename='): return item[len('filename="'):-len('"')] info = response.info() format = '' content_disposition = info.getheader('Content-Disposition') if content_disposition: filename = parse_content_disposition(content_disposition) if filename: format = decompose_file(filename)[2] content_type = info.getheader('Content-Type') if content_type: content_type = parse_content_type(content_type) ext = _mimes.guess_extension(content_type) if ext: format = normalize_format(ext) return format def download_url(url, format=None): """ Download a url (if it corresponds to a remote file) and return a local url to it. """ tmppath = None try: if is_url_a_local_file(url): tmppath = download_local_file(url, format=format) else: tmppath = download_external_url(url, format=format) except StandardError: raise return tmppath class BibDocMoreInfo: """ This class wraps contextual information of the documents, such as the - comments - descriptions - flags. Such information is kept separately per every format/version instance of the corresponding document and is searialized in the database, ready to be retrieved (but not searched). @param docid: the document identifier. @type docid: integer @param more_info: a serialized version of an already existing more_info object. If not specified this information will be readed from the database, and othewise an empty dictionary will be allocated. @raise ValueError: if docid is not a positive integer. @ivar docid: the document identifier as passed to the constructor. @type docid: integer @ivar more_info: the more_info dictionary that will hold all the additional document information. @type more_info: dict of dict of dict @note: in general this class is never instanciated in client code and never used outside bibdocfile module. @note: this class will be extended in the future to hold all the new auxiliary information about a document. """ def __init__(self, docid, more_info=None): if not (type(docid) in (long, int) and docid > 0): raise ValueError("docid is not a positive integer, but %s." % docid) self.docid = docid if more_info is None: res = run_sql('SELECT more_info FROM bibdoc WHERE id=%s', (docid, )) if res and res[0][0]: self.more_info = cPickle.loads(blob_to_string(res[0][0])) else: self.more_info = {} else: self.more_info = cPickle.loads(more_info) if 'descriptions' not in self.more_info: self.more_info['descriptions'] = {} if 'comments' not in self.more_info: self.more_info['comments'] = {} if 'flags' not in self.more_info: self.more_info['flags'] = {} def __repr__(self): """ @return: the canonical string representation of the C{BibDocMoreInfo}. @rtype: string """ return 'BibDocMoreInfo(%i, %s)' % (self.docid, repr(cPickle.dumps(self.more_info))) def flush(self): """ Flush this object to the database. """ run_sql('UPDATE bibdoc SET more_info=%s WHERE id=%s', (cPickle.dumps(self.more_info), self.docid)) def set_flag(self, flagname, format, version): """ Sets a flag. @param flagname: the flag to set (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}). @type flagname: string @param format: the format for which the flag should set. @type format: string @param version: the version for which the flag should set: @type version: integer @raise ValueError: if the flag is not in L{CFG_BIBDOCFILE_AVAILABLE_FLAGS} """ if flagname in CFG_BIBDOCFILE_AVAILABLE_FLAGS: if not flagname in self.more_info['flags']: self.more_info['flags'][flagname] = {} if not version in self.more_info['flags'][flagname]: self.more_info['flags'][flagname][version] = {} if not format in self.more_info['flags'][flagname][version]: self.more_info['flags'][flagname][version][format] = {} self.more_info['flags'][flagname][version][format] = True self.flush() else: raise ValueError, "%s is not in %s" % (flagname, CFG_BIBDOCFILE_AVAILABLE_FLAGS) def get_comment(self, format, version): """ Returns the specified comment. @param format: the format for which the comment should be retrieved. @type format: string @param version: the version for which the comment should be retrieved. @type version: integer @return: the specified comment. @rtype: string """ try: assert(type(version) is int) format = normalize_format(format) return self.more_info['comments'].get(version, {}).get(format) except: register_exception() raise def get_description(self, format, version): """ Returns the specified description. @param format: the format for which the description should be retrieved. @type format: string @param version: the version for which the description should be retrieved. @type version: integer @return: the specified description. @rtype: string """ try: assert(type(version) is int) format = normalize_format(format) return self.more_info['descriptions'].get(version, {}).get(format) except: register_exception() raise def has_flag(self, flagname, format, version): """ Return True if the corresponding has been set. @param flagname: the name of the flag (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}). @type flagname: string @param format: the format for which the flag should be checked. @type format: string @param version: the version for which the flag should be checked. @type version: integer @return: True if the flag is set for the given format/version. @rtype: bool @raise ValueError: if the flagname is not in L{CFG_BIBDOCFILE_AVAILABLE_FLAGS} """ if flagname in CFG_BIBDOCFILE_AVAILABLE_FLAGS: return self.more_info['flags'].get(flagname, {}).get(version, {}).get(format, False) else: raise ValueError, "%s is not in %s" % (flagname, CFG_BIBDOCFILE_AVAILABLE_FLAGS) def get_flags(self, format, version): """ Return the list of all the enabled flags. @param format: the format for which the list should be returned. @type format: string @param version: the version for which the list should be returned. @type version: integer @return: the list of enabled flags (from L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}). @rtype: list of string """ return [flag for flag in self.more_info['flags'] if format in self.more_info['flags'][flag].get(version, {})] def set_comment(self, comment, format, version): """ Set a comment. @param comment: the comment to be set. @type comment: string @param format: the format for which the comment should be set. @type format: string @param version: the version for which the comment should be set: @type version: integer """ try: assert(type(version) is int and version > 0) format = normalize_format(format) if comment == KEEP_OLD_VALUE: comment = self.get_comment(format, version) or self.get_comment(format, version - 1) if not comment: self.unset_comment(format, version) self.flush() return if not version in self.more_info['comments']: self.more_info['comments'][version] = {} self.more_info['comments'][version][format] = comment self.flush() except: register_exception() raise def set_description(self, description, format, version): """ Set a description. @param description: the description to be set. @type description: string @param format: the format for which the description should be set. @type format: string @param version: the version for which the description should be set: @type version: integer """ try: assert(type(version) is int and version > 0) format = normalize_format(format) if description == KEEP_OLD_VALUE: description = self.get_description(format, version) or self.get_description(format, version - 1) if not description: self.unset_description(format, version) self.flush() return if not version in self.more_info['descriptions']: self.more_info['descriptions'][version] = {} self.more_info['descriptions'][version][format] = description self.flush() except: register_exception() raise def unset_comment(self, format, version): """ Unset a comment. @param format: the format for which the comment should be unset. @type format: string @param version: the version for which the comment should be unset: @type version: integer """ try: assert(type(version) is int and version > 0) del self.more_info['comments'][version][format] self.flush() except KeyError: pass except: register_exception() raise def unset_description(self, format, version): """ Unset a description. @param format: the format for which the description should be unset. @type format: string @param version: the version for which the description should be unset: @type version: integer """ try: assert(type(version) is int and version > 0) del self.more_info['descriptions'][version][format] self.flush() except KeyError: pass except: register_exception() raise def unset_flag(self, flagname, format, version): """ Unset a flag. @param flagname: the flag to be unset (see L{CFG_BIBDOCFILE_AVAILABLE_FLAGS}). @type flagname: string @param format: the format for which the flag should be unset. @type format: string @param version: the version for which the flag should be unset: @type version: integer @raise ValueError: if the flag is not in L{CFG_BIBDOCFILE_AVAILABLE_FLAGS} """ if flagname in CFG_BIBDOCFILE_AVAILABLE_FLAGS: try: del self.more_info['flags'][flagname][version][format] self.flush() except KeyError: pass else: raise ValueError, "%s is not in %s" % (flagname, CFG_BIBDOCFILE_AVAILABLE_FLAGS) def serialize(self): """ @return: the serialized version of this object. @rtype: string """ return cPickle.dumps(self.more_info) def readfile(filename): """ Read a file. @param filename: the name of the file to be read. @type filename: string @return: the text contained in the file. @rtype: string @note: Returns empty string in case of any error. @note: this function is useful for quick implementation of websubmit functions. """ try: return open(filename).read() except Exception: return '' class HeadRequest(urllib2.Request): """ A request object to perform a HEAD request. """ def get_method(self): return 'HEAD' def open_url(url, headers=None, head_request=False): """ Opens a URL. If headers are passed as argument, no check is performed and the URL will be opened. Otherwise checks if the URL is present in CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS and uses the headers specified in the config variable. @param url: the URL to open @type url: string @param headers: the headers to use @type headers: dictionary @param head_request: if True, perform a HEAD request, otherwise a POST request @type head_request: boolean @return: a file-like object as returned by urllib2.urlopen. """ headers_to_use = None if headers is None: for regex, headers in _CFG_BIBUPLOAD_FFT_ALLOWED_EXTERNAL_URLS: if regex.match(url) is not None: headers_to_use = headers break if headers_to_use is None: # URL is not allowed. raise InvenioBibdocfileUnauthorizedURL, "%s is not an authorized " \ "external URL." % url else: headers_to_use = headers request_obj = head_request and HeadRequest or urllib2.Request request = request_obj(url) + request.add_header('User-Agent', make_user_agent_string('bibdocfile')) for key, value in headers_to_use.items(): request.add_header(key, value) return urllib2.urlopen(request) diff --git a/modules/websubmit/lib/bibdocfile_regression_tests.py b/modules/websubmit/lib/bibdocfile_regression_tests.py index 59378b259..1c08c3de4 100644 --- a/modules/websubmit/lib/bibdocfile_regression_tests.py +++ b/modules/websubmit/lib/bibdocfile_regression_tests.py @@ -1,276 +1,300 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibDocFile Regression Test Suite.""" __revision__ = "$Id$" import unittest from invenio.testutils import make_test_suite, run_test_suite from invenio.bibdocfile import BibRecDocs, check_bibdoc_authorization, bibdocfile_url_p +from invenio.dbquery import run_sql from invenio.access_control_config import CFG_WEBACCESS_WARNING_MSGS from invenio.config import \ CFG_SITE_URL, \ CFG_PREFIX, \ CFG_WEBSUBMIT_FILEDIR, \ CFG_SITE_RECORD +class BibDocFsInfoTest(unittest.TestCase): + """Regression tests about the table bibdocfsinfo""" + def setUp(self): + self.my_bibrecdoc = BibRecDocs(2) + self.unique_name = self.my_bibrecdoc.propose_unique_docname('file') + self.my_bibdoc = self.my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', docname=self.unique_name) + self.my_bibdoc_id = self.my_bibdoc.id + + def tearDown(self): + self.my_bibdoc.expunge() + + def test_hard_delete(self): + """bibdocfile - test correct update of bibdocfsinfo when hard-deleting""" + self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) + self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True) + self.my_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.gif') + self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 2) + self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'", (self.my_bibdoc_id, ))[0][0], True) + self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], False) + self.my_bibdoc.delete_file('.gif', 2) + self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) + self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True) class BibRecDocsTest(unittest.TestCase): """regression tests about BibRecDocs""" def test_BibRecDocs(self): """bibdocfile - BibRecDocs functions""" my_bibrecdoc = BibRecDocs(2) #add bibdoc my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg') my_bibrecdoc.add_bibdoc(doctype='Main', docname='file', never_fail=False) self.assertEqual(len(my_bibrecdoc.list_bibdocs()), 3) my_added_bibdoc = my_bibrecdoc.get_bibdoc('file') #add bibdocfile in empty bibdoc my_added_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.gif', \ description= 'added in empty bibdoc', comment=None, format=None, flags=['PERFORM_HIDE_PREVIOUS']) #propose unique docname self.assertEqual(my_bibrecdoc.propose_unique_docname('file'), 'file_2') #has docname self.assertEqual(my_bibrecdoc.has_docname_p('file'), True) #merge 2 bibdocs my_bibrecdoc.merge_bibdocs('img_test', 'file') self.assertEqual(len(my_bibrecdoc.get_bibdoc("img_test").list_all_files()), 2) #check file exists self.assertEqual(my_bibrecdoc.check_file_exists(CFG_PREFIX + '/lib/webtest/invenio/test.jpg'), True) #get bibdoc names self.assertEqual(my_bibrecdoc.get_bibdoc_names('Main')[0], '0104007_02') self.assertEqual(my_bibrecdoc.get_bibdoc_names('Main')[1],'img_test') #get total size self.assertEqual(my_bibrecdoc.get_total_size(), 1647591) #get total size latest version self.assertEqual(my_bibrecdoc.get_total_size_latest_version(), 1647591) #display value = my_bibrecdoc.display(docname='img_test', version='', doctype='', ln='en', verbose=0, display_hidden=True) self.assert_("Main" in value) #get xml 8564 value = my_bibrecdoc.get_xml_8564() self.assert_('/'+ CFG_SITE_RECORD +'/2/files/img_test.jpg' in value) #check duplicate docnames self.assertEqual(my_bibrecdoc.check_duplicate_docnames(), True) def tearDown(self): my_bibrecdoc = BibRecDocs(2) #delete my_bibrecdoc.delete_bibdoc('img_test') my_bibrecdoc.delete_bibdoc('file') class BibDocsTest(unittest.TestCase): """regression tests about BibDocs""" def test_BibDocs(self): """bibdocfile - BibDocs functions""" #add file my_bibrecdoc = BibRecDocs(2) my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg') my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test") value = my_bibrecdoc.list_bibdocs() self.assertEqual(len(value), 2) #get total file (bibdoc) self.assertEqual(my_new_bibdoc.get_total_size(), 91750) #get recid self.assertEqual(my_new_bibdoc.get_recid(), 2) #change name my_new_bibdoc.change_name('new_name') #get docname self.assertEqual(my_new_bibdoc.get_docname(), 'new_name') #get type self.assertEqual(my_new_bibdoc.get_type(), 'Main') #get id self.assert_(my_new_bibdoc.get_id() > 80) #set status my_new_bibdoc.set_status('new status') #get status self.assertEqual(my_new_bibdoc.get_status(), 'new status') #get base directory self.assert_(my_new_bibdoc.get_base_dir().startswith(CFG_WEBSUBMIT_FILEDIR)) #get file number self.assertEqual(my_new_bibdoc.get_file_number(), 1) #add file new version my_new_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', description= 'the new version', comment=None, format=None, flags=["PERFORM_HIDE_PREVIOUS"]) self.assertEqual(my_new_bibdoc.list_versions(), [1, 2]) #revert my_new_bibdoc.revert(1) self.assertEqual(my_new_bibdoc.list_versions(), [1, 2, 3]) self.assertEqual(my_new_bibdoc.get_description('.jpg', version=3), 'test add new file') #get total size latest version self.assertEqual(my_new_bibdoc.get_total_size_latest_version(), 91750) #get latest version self.assertEqual(my_new_bibdoc.get_latest_version(), 3) #list latest files self.assertEqual(len(my_new_bibdoc.list_latest_files()), 1) self.assertEqual(my_new_bibdoc.list_latest_files()[0].get_version(), 3) #list version files self.assertEqual(len(my_new_bibdoc.list_version_files(1, list_hidden=True)), 1) #display value = my_new_bibdoc.display(version='', ln='en', display_hidden=True) self.assert_('>test add new file<' in value) #format already exist self.assertEqual(my_new_bibdoc.format_already_exists_p('.jpg'), True) #get file self.assertEqual(my_new_bibdoc.get_file('.jpg', version='1').get_version(), 1) #set description my_new_bibdoc.set_description('new description', '.jpg', version=1) #get description self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1), 'new description') #set comment my_new_bibdoc.set_description('new comment', '.jpg', version=1) #get comment self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1), 'new comment') #get history assert len(my_new_bibdoc.get_history()) > 0 #delete file my_new_bibdoc.delete_file('.jpg', 2) #list all files self.assertEqual(len(my_new_bibdoc.list_all_files()), 2) #delete file my_new_bibdoc.delete_file('.jpg', 3) #add new format my_new_bibdoc.add_file_new_format(CFG_PREFIX + '/lib/webtest/invenio/test.gif', version=None, description=None, comment=None, format=None) self.assertEqual(len(my_new_bibdoc.list_all_files()), 2) #delete file my_new_bibdoc.delete_file('.jpg', 1) #delete file my_new_bibdoc.delete_file('.gif', 1) #empty bibdoc self.assertEqual(my_new_bibdoc.empty_p(), True) #hidden? self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), False) #hide my_new_bibdoc.set_flag('HIDDEN', '.jpg', version=1) #hidden? self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), True) #add and get icon my_new_bibdoc.add_icon( CFG_PREFIX + '/lib/webtest/invenio/icon-test.gif') value = my_bibrecdoc.list_bibdocs()[1] self.assertEqual(value.get_icon(), my_new_bibdoc.get_icon()) #delete icon my_new_bibdoc.delete_icon() #get icon self.assertEqual(my_new_bibdoc.get_icon(), None) #delete my_new_bibdoc.delete() self.assertEqual(my_new_bibdoc.deleted_p(), True) #undelete my_new_bibdoc.undelete(previous_status='') #expunging my_new_bibdoc.expunge() my_bibrecdoc.build_bibdoc_list() self.failIf('new_name' in my_bibrecdoc.get_bibdoc_names()) self.failUnless(my_bibrecdoc.get_bibdoc_names()) def tearDown(self): my_bibrecdoc = BibRecDocs(2) #delete my_bibrecdoc.delete_bibdoc('img_test') my_bibrecdoc.delete_bibdoc('new_name') class BibDocFilesTest(unittest.TestCase): """regression tests about BibDocFiles""" def test_BibDocFiles(self): """bibdocfile - BibDocFile functions """ #add bibdoc my_bibrecdoc = BibRecDocs(2) my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg') my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test") my_new_bibdocfile = my_new_bibdoc.list_all_files()[0] #get url self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD) #get type self.assertEqual(my_new_bibdocfile.get_type(), 'Main') #get path self.assert_(my_new_bibdocfile.get_path().startswith(CFG_WEBSUBMIT_FILEDIR)) self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1')) #get bibdocid self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id()) #get name self.assertEqual(my_new_bibdocfile.get_name() , 'img_test') #get full name self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg') #get full path self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_WEBSUBMIT_FILEDIR)) self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1')) #get format self.assertEqual(my_new_bibdocfile.get_format(), '.jpg') #get version self.assertEqual(my_new_bibdocfile.get_version(), 1) #get description self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1)) #get comment self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1)) #get recid self.assertEqual(my_new_bibdocfile.get_recid(), 2) #get status self.assertEqual(my_new_bibdocfile.get_status(), '') #get size self.assertEqual(my_new_bibdocfile.get_size(), 91750) #get checksum self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76') #check self.assertEqual(my_new_bibdocfile.check(), True) #display value = my_new_bibdocfile.display(ln='en') assert 'files/img_test.jpg?version=1">' in value #hidden? self.assertEqual(my_new_bibdocfile.hidden_p(), False) #delete my_new_bibdoc.delete() self.assertEqual(my_new_bibdoc.deleted_p(), True) class CheckBibDocAuthorizationTest(unittest.TestCase): """Regression tests for check_bibdoc_authorization function.""" def test_check_bibdoc_authorization(self): """bibdocfile - check_bibdoc_authorization function""" from invenio.webuser import collect_user_info, get_uid_from_email jekyll = collect_user_info(get_uid_from_email('jekyll@cds.cern.ch')) self.assertEqual(check_bibdoc_authorization(jekyll, 'role:thesesviewer'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'role: thesesviewer'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'role: thesesviewer'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'Role: thesesviewer'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'email: jekyll@cds.cern.ch'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(jekyll, 'email: jekyll@cds.cern.ch'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) juliet = collect_user_info(get_uid_from_email('juliet.capulet@cds.cern.ch')) self.assertEqual(check_bibdoc_authorization(juliet, 'restricted_picture'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertEqual(check_bibdoc_authorization(juliet, 'status: restricted_picture'), (0, CFG_WEBACCESS_WARNING_MSGS[0])) self.assertNotEqual(check_bibdoc_authorization(juliet, 'restricted_video')[0], 0) self.assertNotEqual(check_bibdoc_authorization(juliet, 'status: restricted_video')[0], 0) class BibDocFileURLTest(unittest.TestCase): """Regression tests for bibdocfile_url_p function.""" def test_bibdocfile_url_p(self): self.failUnless(bibdocfile_url_p(CFG_SITE_URL + '/%s/98/files/9709037.pdf' % CFG_SITE_RECORD)) self.failUnless(bibdocfile_url_p(CFG_SITE_URL + '/%s/098/files/9709037.pdf' % CFG_SITE_RECORD)) TEST_SUITE = make_test_suite(BibRecDocsTest, BibDocsTest, BibDocFilesTest, CheckBibDocAuthorizationTest, - BibDocFileURLTest) + BibDocFileURLTest, + BibDocFsInfoTest) if __name__ == "__main__": run_test_suite(TEST_SUITE, warn_user=True) diff --git a/modules/websubmit/lib/websubmit_config.py b/modules/websubmit/lib/websubmit_config.py index 688629ea5..b888f33fd 100644 --- a/modules/websubmit/lib/websubmit_config.py +++ b/modules/websubmit/lib/websubmit_config.py @@ -1,225 +1,264 @@ ## This file is part of Invenio. ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Invenio Submission Web Interface config file.""" __revision__ = "$Id$" import re ## test: test = "FALSE" ## CC all action confirmation mails to administrator? (0 == NO; 1 == YES) CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN = 0 ## During submission, warn user if she is going to leave the ## submission by following some link on the page? ## Does not work with Opera and Konqueror. ## This requires all submission functions to set Javascript variable ## 'user_must_confirm_before_leaving_page' to 'false' before ## programmatically submitting a form , or else users will be asked ## confirmation after each submission step. ## (0 == NO; 1 == YES) CFG_WEBSUBMIT_CHECK_USER_LEAVES_SUBMISSION = 0 ## List of keywords/format parameters that should not write by default ## corresponding files in submission directory (`curdir'). Some other ## filenames not included here are reserved too, such as those ## containing non-alphanumeric chars (excepted underscores '_'), for ## eg all names containing a dot ('bibdocactions.log', ## 'performed_actions.log', etc.) CFG_RESERVED_SUBMISSION_FILENAMES = ['SuE', 'files', 'lastuploadedfile', 'curdir', 'function_log', 'SN', 'ln'] ## CFG_WEBSUBMIT_ICON_SUBFORMAT_RE -- a subformat is an Invenio concept to give ## file formats more semantic. For example "foo.gif;icon" has ".gif;icon" ## 'format', ".gif" 'superformat' and "icon" 'subformat'. That means that this ## particular format/instance of the "foo" document, not only is a ".gif" but ## is in the shape of an "icon", i.e. most probably it will be low-resolution. ## This configuration variable let the administrator to decide which implicit ## convention will be used to know which formats will be meant to be used ## as an icon. CFG_WEBSUBMIT_ICON_SUBFORMAT_RE = re.compile(r"icon.*") ## CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT -- this is the default subformat used ## when creating new icons. CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT = "icon" ## Prefix for video uploads, Garbage Collector CFG_WEBSUBMIT_TMP_VIDEO_PREFIX = "video_upload_" +try: + from invenio.config import CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC +except ImportError: + CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC = { + 'can_revise_doctypes': ['*'], + 'can_comment_doctypes': ['*'], + 'can_describe_doctypes': ['*'], + 'can_delete_doctypes': ['*'], + 'can_keep_doctypes': ['*'], + 'can_rename_doctypes': ['*'], + 'can_add_format_to_doctypes': ['*'], + 'can_restrict_doctypes': ['*']} + +try: + from invenio.config import CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES +except ImportError: + CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES = [ + ('Main', 'Main document'), + ('LaTeX', 'LaTeX'), + ('Source', 'Source'), + ('Additional', 'Additional File'), + ('Audio', 'Audio file'), + ('Video', 'Video file'), + ('Script', 'Script'), + ('Data', 'Data'), + ('Figure', 'Figure'), + ('Schema', 'Schema'), + ('Graph', 'Graph'), + ('Image', 'Image'), + ('Drawing', 'Drawing'), + ('Slides', 'Slides')] + +try: + from invenio.config import CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS +except ImportError: + CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS = [ + ('', 'Public'), + ('restricted', 'Restricted')] + class InvenioWebSubmitFunctionError(Exception): """This exception should only ever be raised by WebSubmit functions. It will be caught and handled by the WebSubmit core itself. It is used to signal to WebSubmit core that one of the functions encountered a FATAL ERROR situation that should all further execution of the submission. The exception will carry an error message in its "value" string. This message will probably be displayed on the user's browser in an Invenio "error" box, and may be logged for the admin to examine. Again: If this exception is raised by a WebSubmit function, an error message will displayed and the submission ends in failure. Extends: Exception. """ def __init__(self, value): """Set the internal "value" attribute to that of the passed "value" parameter. @param value: (string) - an error string to display to the user. """ Exception.__init__(self) self.value = value def __str__(self): """Return oneself as a string (actually, return the contents of self.value). @return: (string) """ return str(self.value) class InvenioWebSubmitFunctionStop(Exception): """This exception should only ever be raised by WebSubmit functions. It will be caught and handled by the WebSubmit core itself. It is used to signal to WebSubmit core that one of the functions encountered a situation that should prevent the functions that follow it from being executed, and that WebSubmit core should display some sort of message to the user. This message will be stored in the "value" attribute of the object. *** NOTE: In the current WebSubmit, this "value" is ususally a JavaScript string that redirects the user's browser back to the Web form phase of the submission. The use of JavaScript, however is going to be removed in the future, so the mechanism may change. *** Extends: Exception. """ def __init__(self, value): """Set the internal "value" attribute to that of the passed "value" parameter. @param value: (string) - a string to display to the user. """ Exception.__init__(self) self.value = value def __str__(self): """Return oneself as a string (actually, return the contents of self.value). @return: (string) """ return str(self.value) class InvenioWebSubmitFunctionWarning(Exception): """This exception should be raised by a WebSubmit function when unexpected behaviour is encountered during the execution of the function. The unexpected behaviour should not have been so serious that execution had to be halted, but since the function was unable to perform its task, the event must be logged. Logging of the exception will be performed by WebSubmit. Extends: Exception. """ def __init__(self, value): """Set the internal "value" attribute to that of the passed "value" parameter. @param value: (string) - a string to write to the log. """ Exception.__init__(self) self.value = value def __str__(self): """Return oneself as a string (actually, return the contents of self.value). @return: (string) """ return str(self.value) class InvenioWebSubmitFileStamperError(Exception): """This exception should be raised by websubmit_file_stamper when an error is encoutered that prevents a file from being stamped. When caught, this exception should be used to stop processing with a failure signal. Extends: Exception. """ def __init__(self, value): """Set the internal "value" attribute to that of the passed "value" parameter. @param value: (string) - a string to write to the log. """ Exception.__init__(self) self.value = value def __str__(self): """Return oneself as a string (actually, return the contents of self.value). @return: (string) """ return str(self.value) class InvenioWebSubmitIconCreatorError(Exception): """This exception should be raised by websubmit_icon_creator when an error is encoutered that prevents an icon from being created. When caught, this exception should be used to stop processing with a failure signal. Extends: Exception. """ def __init__(self, value): """Set the internal "value" attribute to that of the passed "value" parameter. @param value: (string) - a string to write to the log. """ Exception.__init__(self) self.value = value def __str__(self): """Return oneself as a string (actually, return the contents of self.value). @return: (string) """ return str(self.value) class InvenioWebSubmitFileMetadataRuntimeError(Exception): """This exception should be raised by websubmit_file_metadata plugins when an error is encoutered that prevents a extracting/updating a file. When caught, this exception should be used to stop processing with a failure signal. Extends: Exception. """ def __init__(self, value): """Set the internal "value" attribute to that of the passed "value" parameter. @param value: (string) - a string to write to the log. """ Exception.__init__(self) self.value = value def __str__(self): """Return oneself as a string (actually, return the contents of self.value). @return: (string) """ return str(self.value) diff --git a/modules/websubmit/lib/websubmit_webinterface.py b/modules/websubmit/lib/websubmit_webinterface.py index 3412ca3b9..160fc96c9 100644 --- a/modules/websubmit/lib/websubmit_webinterface.py +++ b/modules/websubmit/lib/websubmit_webinterface.py @@ -1,1478 +1,1454 @@ ## This file is part of Invenio. ## Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __lastupdated__ = """$Date$""" __revision__ = "$Id$" import os import errno import time import cgi import sys import shutil from urllib import urlencode from invenio.config import \ CFG_ACCESS_CONTROL_LEVEL_SITE, \ CFG_SITE_LANG, \ CFG_SITE_NAME, \ CFG_TMPSHAREDDIR, \ CFG_SITE_NAME_INTL, \ CFG_SITE_URL, \ CFG_SITE_SECURE_URL, \ CFG_WEBSUBMIT_STORAGEDIR, \ CFG_PREFIX, \ CFG_CERN_SITE, \ CFG_SITE_RECORD from invenio import webinterface_handler_config as apache from invenio.dbquery import run_sql from invenio.access_control_config import VIEWRESTRCOLL from invenio.access_control_mailcookie import mail_cookie_create_authorize_action from invenio.access_control_engine import acc_authorize_action from invenio.access_control_admin import acc_is_role from invenio.webpage import page, create_error_box, pageheaderonly, \ pagefooteronly from invenio.webuser import getUid, page_not_authorized, collect_user_info, isUserSuperAdmin, \ isGuestUser from invenio.websubmit_config import * from invenio import webjournal_utils from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory from invenio.urlutils import make_canonical_urlargd, redirect_to_url from invenio.messages import gettext_set_language from invenio.search_engine import \ guess_primary_collection_of_a_record, get_colID, record_exists, \ create_navtrail_links, check_user_can_view_record, record_empty, \ is_user_owner_of_record from invenio.bibdocfile import BibRecDocs, normalize_format, file_strip_ext, \ stream_restricted_icon, BibDoc, InvenioWebSubmitFileError, stream_file, \ decompose_file, propose_next_docname, get_subformat_from_format from invenio.errorlib import register_exception from invenio.htmlutils import is_html_text_editor_installed from invenio.websubmit_icon_creator import create_icon, InvenioWebSubmitIconCreatorError from ckeditor_invenio_connector import process_CKEditor_upload, send_response import invenio.template websubmit_templates = invenio.template.load('websubmit') from invenio.websearchadminlib import get_detailed_page_tabs from invenio.session import get_session from invenio.jsonutils import json, CFG_JSON_AVAILABLE import invenio.template webstyle_templates = invenio.template.load('webstyle') websearch_templates = invenio.template.load('websearch') from invenio.websubmit_managedocfiles import \ create_file_upload_interface, \ get_upload_file_interface_javascript, \ get_upload_file_interface_css, \ move_uploaded_files_to_storage class WebInterfaceFilesPages(WebInterfaceDirectory): def __init__(self,recid): self.recid = recid def _lookup(self, component, path): # after ///files/ every part is used as the file # name filename = component def getfile(req, form): args = wash_urlargd(form, websubmit_templates.files_default_urlargd) ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) user_info = collect_user_info(req) verbose = args['verbose'] if verbose >= 1 and not isUserSuperAdmin(user_info): # Only SuperUser can see all the details! verbose = 0 if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE > 1: return page_not_authorized(req, "/%s/%s" % (CFG_SITE_RECORD, self.recid), navmenuid='submit') if record_exists(self.recid) < 1: msg = "

    %s

    " % _("Requested record does not seem to exist.") return warningMsg(msg, req, CFG_SITE_NAME, ln) if record_empty(self.recid): msg = "

    %s

    " % _("Requested record does not seem to have been integrated.") return warningMsg(msg, req, CFG_SITE_NAME, ln) (auth_code, auth_message) = check_user_can_view_record(user_info, self.recid) if auth_code and user_info['email'] == 'guest': if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : guess_primary_collection_of_a_record(self.recid)}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) return redirect_to_url(req, target, norobot=True) elif auth_code: if webjournal_utils.is_recid_in_released_issue(self.recid): # We can serve the file pass else: return page_not_authorized(req, "../", \ text = auth_message) readonly = CFG_ACCESS_CONTROL_LEVEL_SITE == 1 # From now on: either the user provided a specific file # name (and a possible version), or we return a list of # all the available files. In no case are the docids # visible. try: bibarchive = BibRecDocs(self.recid) except InvenioWebSubmitFileError, e: register_exception(req=req, alert_admin=True) msg = "

    %s

    %s

    " % ( _("The system has encountered an error in retrieving the list of files for this document."), _("The error has been logged and will be taken in consideration as soon as possible.")) return warningMsg(msg, req, CFG_SITE_NAME, ln) if bibarchive.deleted_p(): return print_warning(req, _("Requested record does not seem to exist.")) docname = '' format = '' version = '' warn = '' if filename: # We know the complete file name, guess which docid it # refers to ## TODO: Change the extension system according to ext.py from setlink ## and have a uniform extension mechanism... docname = file_strip_ext(filename) format = filename[len(docname):] if format and format[0] != '.': format = '.' + format if args['subformat']: format += ';%s' % args['subformat'] else: docname = args['docname'] if not format: format = args['format'] if args['subformat']: format += ';%s' % args['subformat'] if not version: version = args['version'] ## Download as attachment is_download = False if args['download']: is_download = True # version could be either empty, or all or an integer try: int(version) except ValueError: if version != 'all': version = '' display_hidden = isUserSuperAdmin(user_info) if version != 'all': # search this filename in the complete list of files for doc in bibarchive.list_bibdocs(): if docname == doc.get_docname(): try: docfile = doc.get_file(format, version) (auth_code, auth_message) = docfile.is_restricted(user_info) if auth_code != 0 and not is_user_owner_of_record(user_info, self.recid): if CFG_WEBSUBMIT_ICON_SUBFORMAT_RE.match(get_subformat_from_format(format)): return stream_restricted_icon(req) if user_info['email'] == 'guest': cookie = mail_cookie_create_authorize_action('viewrestrdoc', {'status' : docfile.get_status()}) target = '/youraccount/login' + \ make_canonical_urlargd({'action': cookie, 'ln' : ln, 'referer' : \ CFG_SITE_SECURE_URL + user_info['uri']}, {}) redirect_to_url(req, target) else: req.status = apache.HTTP_UNAUTHORIZED warn += print_warning(_("This file is restricted: ") + auth_message) break if not docfile.hidden_p(): if not readonly: ip = str(req.remote_ip) res = doc.register_download(ip, version, format, uid) try: return docfile.stream(req, download=is_download) except InvenioWebSubmitFileError, msg: register_exception(req=req, alert_admin=True) req.status = apache.HTTP_INTERNAL_SERVER_ERROR return warningMsg(_("An error has happened in trying to stream the request file."), req, CFG_SITE_NAME, ln) else: req.status = apache.HTTP_UNAUTHORIZED warn = print_warning(_("The requested file is hidden and can not be accessed.")) except InvenioWebSubmitFileError, msg: register_exception(req=req, alert_admin=True) if docname and format and not warn: req.status = apache.HTTP_NOT_FOUND warn += print_warning(_("Requested file does not seem to exist.")) filelist = bibarchive.display("", version, ln=ln, verbose=verbose, display_hidden=display_hidden) t = warn + websubmit_templates.tmpl_filelist( ln=ln, recid=self.recid, docname=args['docname'], version=version, filelist=filelist) cc = guess_primary_collection_of_a_record(self.recid) unordered_tabs = get_detailed_page_tabs(get_colID(cc), self.recid, ln) ordered_tabs_id = [(tab_id, values['order']) for (tab_id, values) in unordered_tabs.iteritems()] ordered_tabs_id.sort(lambda x,y: cmp(x[1],y[1])) link_ln = '' if ln != CFG_SITE_LANG: link_ln = '?ln=%s' % ln tabs = [(unordered_tabs[tab_id]['label'], \ '%s/%s/%s/%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, tab_id, link_ln), \ tab_id == 'files', unordered_tabs[tab_id]['enabled']) \ for (tab_id, order) in ordered_tabs_id if unordered_tabs[tab_id]['visible'] == True] top = webstyle_templates.detailed_record_container_top(self.recid, tabs, args['ln']) bottom = webstyle_templates.detailed_record_container_bottom(self.recid, tabs, args['ln']) title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, self.recid, args['ln']) return pageheaderonly(title=title, navtrail=create_navtrail_links(cc=cc, aas=0, ln=ln) + \ ''' > %s > %s''' % \ (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, title, _("Access to Fulltext")), description="", keywords="keywords", uid=uid, language=ln, req=req, navmenuid='search', navtrail_append_title_p=0) + \ websearch_templates.tmpl_search_pagestart(ln) + \ top + t + bottom + \ websearch_templates.tmpl_search_pageend(ln) + \ pagefooteronly(lastupdated=__lastupdated__, language=ln, req=req) return getfile, [] def __call__(self, req, form): """Called in case of URLs like /CFG_SITE_RECORD/123/files without trailing slash. """ args = wash_urlargd(form, websubmit_templates.files_default_urlargd) ln = args['ln'] link_ln = '' if ln != CFG_SITE_LANG: link_ln = '?ln=%s' % ln return redirect_to_url(req, '%s/%s/%s/files/%s' % (CFG_SITE_URL, CFG_SITE_RECORD, self.recid, link_ln)) def websubmit_legacy_getfile(req, form): """ Handle legacy /getfile.py URLs """ args = wash_urlargd(form, { 'recid': (int, 0), 'docid': (int, 0), 'version': (str, ''), 'name': (str, ''), 'format': (str, ''), 'ln' : (str, CFG_SITE_LANG) }) _ = gettext_set_language(args['ln']) def _getfile_py(req, recid=0, docid=0, version="", name="", format="", ln=CFG_SITE_LANG): if not recid: ## Let's obtain the recid from the docid if docid: try: bibdoc = BibDoc(docid=docid) recid = bibdoc.get_recid() except InvenioWebSubmitFileError, e: return warningMsg(_("An error has happened in trying to retrieve the requested file."), req, CFG_SITE_NAME, ln) else: return warningMsg(_('Not enough information to retrieve the document'), req, CFG_SITE_NAME, ln) else: if not name and docid: ## Let's obtain the name from the docid try: bibdoc = BibDoc(docid) name = bibdoc.get_docname() except InvenioWebSubmitFileError, e: return warningMsg(_("An error has happened in trying to retrieving the requested file."), req, CFG_SITE_NAME, ln) format = normalize_format(format) redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, format, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY) return _getfile_py(req, **args) # -------------------------------------------------- from invenio.websubmit_engine import home, action, interface, endaction, makeCataloguesTable class WebInterfaceSubmitPages(WebInterfaceDirectory): _exports = ['summary', 'sub', 'direct', '', 'attachfile', 'uploadfile', \ 'getuploadedfile', 'managedocfiles', 'managedocfilesasync', \ 'upload_video', ('continue', 'continue_')] def managedocfiles(self, req, form): """ Display admin interface to manage files of a record """ argd = wash_urlargd(form, { 'ln': (str, ''), 'access': (str, ''), 'recid': (int, None), 'do': (int, 0), 'cancel': (str, None), }) _ = gettext_set_language(argd['ln']) uid = getUid(req) user_info = collect_user_info(req) # Check authorization (auth_code, auth_msg) = acc_authorize_action(req, 'runbibdocfile') if auth_code and user_info['email'] == 'guest': # Ask to login target = '/youraccount/login' + \ make_canonical_urlargd({'ln' : argd['ln'], 'referer' : CFG_SITE_SECURE_URL + user_info['uri']}, {}) return redirect_to_url(req, target) elif auth_code: return page_not_authorized(req, referer="/submit/managedocfiles", uid=uid, text=auth_msg, ln=argd['ln'], navmenuid="admin") # Prepare navtrail navtrail = '''Admin Area > %(manage_files)s''' \ % {'CFG_SITE_URL': CFG_SITE_URL, 'manage_files': _("Manage Document Files")} body = '' if argd['do'] != 0 and not argd['cancel']: # Apply modifications working_dir = os.path.join(CFG_TMPSHAREDDIR, 'websubmit_upload_interface_config_' + str(uid), argd['access']) move_uploaded_files_to_storage(working_dir=working_dir, recid=argd['recid'], icon_sizes=['180>','700>'], create_icon_doctypes=['*'], force_file_revision=False) # Clean temporary directory shutil.rmtree(working_dir) # Confirm modifications body += '

    %s

    ' % \ (_('Your modifications to record #%i have been submitted') % argd['recid']) elif argd['cancel']: # Clean temporary directory working_dir = os.path.join(CFG_TMPSHAREDDIR, 'websubmit_upload_interface_config_' + str(uid), argd['access']) shutil.rmtree(working_dir) body += '

    %s

    ' % \ (_('Your modifications to record #%i have been cancelled') % argd['recid']) if not argd['recid'] or argd['do'] != 0: body += '''
    ''' % {'edit': _('Edit'), 'edit_record': _('Edit record'), 'CFG_SITE_URL': CFG_SITE_URL} access = time.strftime('%Y%m%d_%H%M%S') if argd['recid'] and argd['do'] == 0: # Displaying interface to manage files # Prepare navtrail title, description, keywords = websearch_templates.tmpl_record_page_header_content(req, argd['recid'], argd['ln']) navtrail = '''Admin Area > %(manage_files)s > %(record)s: %(title)s ''' \ % {'CFG_SITE_URL': CFG_SITE_URL, 'title': title, 'manage_files': _("Document File Manager"), 'record': _("Record #%i") % argd['recid']} - # FIXME: add parameters to `runbibdocfile' in order to - # configure the file editor based on role, or at least - # move configuration below to some config file. body += create_file_upload_interface(\ recid=argd['recid'], ln=argd['ln'], - doctypes_and_desc=[('Main', 'Main document'), - ('LaTeX', 'LaTeX'), - ('Source', 'Source'), - ('Additional', 'Additional File'), - ('Audio', 'Audio file'), - ('Video', 'Video file'), - ('Script', 'Script'), - ('Data', 'Data'), - ('Figure', 'Figure'), - ('Schema', 'Schema'), - ('Graph', 'Graph'), - ('Image', 'Image'), - ('Drawing', 'Drawing'), - ('Slides', 'Slides')], - can_revise_doctypes=['*'], - can_comment_doctypes=['*'], - can_describe_doctypes=['*'], - can_delete_doctypes=['*'], - can_keep_doctypes=['*'], - can_rename_doctypes=['*'], - can_add_format_to_doctypes=['*'], - can_restrict_doctypes=['*'], - restrictions_and_desc=[('', 'Public'), - ('restricted', 'Restricted')], uid=uid, sbm_access=access, - display_hidden_files=True)[1] + display_hidden_files=True, + restrictions_and_desc=CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_RESTRICTIONS, + doctypes_and_desc=CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_DOCTYPES, + **CFG_WEBSUBMIT_DOCUMENT_FILE_MANAGER_MISC)[1] body += '''
    ''' % \ {'apply_changes': _("Apply changes"), 'cancel_changes': _("Cancel all changes"), 'recid': argd['recid'], 'access': access, 'ln': argd['ln'], 'CFG_SITE_URL': CFG_SITE_URL} body += websubmit_templates.tmpl_page_do_not_leave_submission_js(argd['ln'], enabled=True) return page(title = _("Document File Manager") + (argd['recid'] and (': ' + _("Record #%i") % argd['recid']) or ''), navtrail=navtrail, navtrail_append_title_p=0, metaheaderadd = get_upload_file_interface_javascript(form_url_params='?access='+access) + \ get_upload_file_interface_css(), body = body, uid = uid, language=argd['ln'], req=req, navmenuid='admin') def managedocfilesasync(self, req, form): "Upload file and returns upload interface" argd = wash_urlargd(form, { 'ln': (str, ''), 'recid': (int, 1), 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), }) user_info = collect_user_info(req) include_headers = False # User submitted either through WebSubmit, or admin interface. if form.has_key('doctype') and form.has_key('indir') \ and form.has_key('access'): # Submitted through WebSubmit. Check rights include_headers = True working_dir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) try: assert(working_dir == os.path.abspath(working_dir)) except AssertionError: raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) try: # Retrieve recid from working_dir, safer. recid_fd = file(os.path.join(working_dir, 'SN')) recid = int(recid_fd.read()) recid_fd.close() except: recid = "" try: act_fd = file(os.path.join(working_dir, 'act')) action = act_fd.read() act_fd.close() except: action = "" # Is user authorized to perform this action? (auth_code, auth_msg) = acc_authorize_action(user_info, "submit", authorized_if_no_roles=not isGuestUser(getUid(req)), doctype=argd['doctype'], act=action) if not acc_is_role("submit", doctype=argd['doctype'], act=action): # There is NO authorization plugged. User should have access auth_code = 0 else: # User must be allowed to attach files (auth_code, auth_msg) = acc_authorize_action(user_info, 'runbibdocfile') recid = argd['recid'] if auth_code: raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) return create_file_upload_interface(recid=recid, ln=argd['ln'], print_outside_form_tag=False, print_envelope=False, form=form, include_headers=include_headers, sbm_indir=argd['indir'], sbm_access=argd['access'], sbm_doctype=argd['doctype'], uid=user_info['uid'])[1] def uploadfile(self, req, form): """ Similar to /submit, but only consider files. Nice for asynchronous Javascript uploads. Should be used to upload a single file. Also try to create an icon, and return URL to file(s) + icon(s) Authentication is performed based on session ID passed as parameter instead of cookie-based authentication, due to the use of this URL by the Flash plugin (to upload multiple files at once), which does not route cookies. FIXME: consider adding /deletefile and /modifyfile functions + parsing of additional parameters to rename files, add comments, restrictions, etc. """ argd = wash_urlargd(form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if not form.has_key("indir") or \ not form.has_key("doctype") or \ not form.has_key("access"): raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if form.has_key("session_id"): # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. A ValueError will be raises if there is # something wrong session = get_session(req=req, sid=argd['session_id']) try: session = get_session(req=req, sid=argd['session_id']) except ValueError, e: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Retrieve user information. We cannot rely on the session here. res = run_sql("SELECT uid FROM session WHERE session_key=%s", (argd['session_id'],)) if len(res): uid = res[0][0] user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: action = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action(uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response added_files = {} for key, formfields in form.items(): filename = key.replace("[]", "") file_to_open = os.path.join(curdir, filename) if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath(os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError, e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist n = 1 while os.path.exists(os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension # This may be dangerous if the file size is bigger than the available memory fp = open(os.path.join(dir_to_open, filename), "w") fp.write(formfields.file.read()) fp.close() fp = open(os.path.join(curdir, "lastuploadedfile"), "w") fp.write(filename) fp.close() fp = open(file_to_open, "w") fp.write(filename) fp.close() try: # Create icon (icon_path, icon_name) = create_icon( { 'input-file' : os.path.join(dir_to_open, filename), 'icon-name' : filename, # extension stripped automatically 'icon-file-format' : 'gif', 'multipage-icon' : False, 'multipage-icon-delay' : 100, 'icon-scale' : "300>", # Resize only if width > 300 'verbosity' : 0, }) icons_dir = os.path.join(os.path.join(curdir, 'icons', str(user_info['uid']), key)) if not os.path.exists(icons_dir): # Create uid/icons dir if needed try: os.makedirs(icons_dir) except OSError, e: if e.errno != errno.EEXIST: # If the issue is only that # directory already exists, # then continue, else report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) os.rename(os.path.join(icon_path, icon_name), os.path.join(icons_dir, icon_name)) added_files[key] = {'name': filename, 'iconName': icon_name} except InvenioWebSubmitIconCreatorError, e: # We could not create the icon added_files[key] = {'name': filename} continue else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Send our response if CFG_JSON_AVAILABLE: return json.dumps(added_files) def upload_video(self, req, form): """ A clone of uploadfile but for (large) videos. Does not copy the uploaded file to the websubmit directory. Instead, the path to the file is stored inside the submission directory. """ def gcd(a,b): """ the euclidean algorithm """ while a: a, b = b%a, a return b from invenio.bibencode_extract import extract_frames from invenio.bibencode_config import CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME from invenio.bibencode_encode import determine_aspect from invenio.bibencode_utils import probe from invenio.bibencode_metadata import ffprobe_metadata from invenio.websubmit_config import CFG_WEBSUBMIT_TMP_VIDEO_PREFIX argd = wash_urlargd(form, { 'doctype': (str, ''), 'access': (str, ''), 'indir': (str, ''), 'session_id': (str, ''), 'rename': (str, ''), }) curdir = None if not form.has_key("indir") or \ not form.has_key("doctype") or \ not form.has_key("access"): raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) else: curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access']) user_info = collect_user_info(req) if form.has_key("session_id"): # Are we uploading using Flash, which does not transmit # cookie? The expect to receive session_id as a form # parameter. First check that IP addresses do not # mismatch. A ValueError will be raises if there is # something wrong session = get_session(req=req, sid=argd['session_id']) try: session = get_session(req=req, sid=argd['session_id']) except ValueError, e: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) # Retrieve user information. We cannot rely on the session here. res = run_sql("SELECT uid FROM session WHERE session_key=%s", (argd['session_id'],)) if len(res): uid = res[0][0] user_info = collect_user_info(uid) try: act_fd = file(os.path.join(curdir, 'act')) action = act_fd.read() act_fd.close() except: act = "" # Is user authorized to perform this action? (auth_code, auth_message) = acc_authorize_action(uid, "submit", authorized_if_no_roles=not isGuestUser(uid), verbose=0, doctype=argd['doctype'], act=action) if acc_is_role("submit", doctype=argd['doctype'], act=action) and auth_code != 0: # User cannot submit raise apache.SERVER_RETURN(apache.HTTP_UNAUTHORIZED) else: # Process the upload and get the response json_response = {} for key, formfields in form.items(): filename = key.replace("[]", "") if hasattr(formfields, "filename") and formfields.filename: dir_to_open = os.path.abspath(os.path.join(curdir, 'files', str(user_info['uid']), key)) try: assert(dir_to_open.startswith(CFG_WEBSUBMIT_STORAGEDIR)) except AssertionError: register_exception(req=req, prefix='curdir="%s", key="%s"' % (curdir, key)) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) if not os.path.exists(dir_to_open): try: os.makedirs(dir_to_open) except OSError, e: if e.errno != errno.EEXIST: # If the issue is only that directory # already exists, then continue, else # report register_exception(req=req, alert_admin=True) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) filename = formfields.filename ## Before saving the file to disc, wash the filename (in particular ## washing away UNIX and Windows (e.g. DFS) paths): filename = os.path.basename(filename.split('\\')[-1]) filename = filename.strip() if filename != "": # Check that file does not already exist while os.path.exists(os.path.join(dir_to_open, filename)): #dirname, basename, extension = decompose_file(new_destination_path) basedir, name, extension = decompose_file(filename) new_name = propose_next_docname(name) filename = new_name + extension #-------------# # VIDEO STUFF # #-------------# ## Remove all previous uploads filelist = os.listdir(os.path.split(formfields.file.name)[0]) for afile in filelist: if argd['access'] in afile: os.remove(os.path.join(os.path.split(formfields.file.name)[0], afile)) ## Check if the file is a readable video ## We must exclude all image and audio formats that are readable by ffprobe if (os.path.splitext(filename)[1] in ['jpg', 'jpeg', 'gif', 'tiff', 'bmp', 'png', 'tga', 'jp2', 'j2k', 'jpf', 'jpm', 'mj2', 'biff', 'cgm', 'exif', 'img', 'mng', 'pic', 'pict', 'raw', 'wmf', 'jpe', 'jif', 'jfif', 'jfi', 'tif', 'webp', 'svg', 'ai', 'ps', 'psd', 'wav', 'mp3', 'pcm', 'aiff', 'au', 'flac', 'wma', 'm4a', 'wv', 'oga', 'm4a', 'm4b', 'm4p', 'm4r', 'aac', 'mp4', 'vox', 'amr', 'snd'] or not probe(formfields.file.name)): formfields.file.close() raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## We have no "delete" attribute in Python 2.4 if sys.hexversion < 0x2050000: ## We need to rename first and create a dummy file ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) dummy = open(formfields.file.name, "w") dummy.close() formfields.file.close() else: # Mark the NamedTemporatyFile as not to be deleted formfields.file.delete = False formfields.file.close() ## Rename the temporary file for the garbage collector new_tmp_fullpath = os.path.split(formfields.file.name)[0] + "/" + CFG_WEBSUBMIT_TMP_VIDEO_PREFIX + argd['access'] + "_" + os.path.split(formfields.file.name)[1] os.rename(formfields.file.name, new_tmp_fullpath) # Write the path to the temp file to a file in STORAGEDIR fp = open(os.path.join(dir_to_open, "filepath"), "w") fp.write(new_tmp_fullpath) fp.close() fp = open(os.path.join(dir_to_open, "filename"), "w") fp.write(filename) fp.close() ## We are going to extract some thumbnails for websubmit ## sample_dir = os.path.join(curdir, 'files', str(user_info['uid']), CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR) try: ## Remove old thumbnails shutil.rmtree(sample_dir) except OSError: register_exception(req=req, alert_admin=False) try: os.makedirs(os.path.join(curdir, 'files', str(user_info['uid']), sample_dir)) except OSError: register_exception(req=req, alert_admin=False) try: extract_frames(input_file=new_tmp_fullpath, output_file=os.path.join(sample_dir, CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME), size="600x600", numberof=5) json_response['frames'] = [] for extracted_frame in os.listdir(sample_dir): json_response['frames'].append(extracted_frame) except: ## If the frame extraction fails, something was bad with the video os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to detect the aspect. if this fails, the video is not readable ## or a wrong file might have been uploaded try: (aspect, width, height) = determine_aspect(new_tmp_fullpath) if aspect: aspx, aspy = aspect.split(':') else: the_gcd = gcd(width, height) aspx = str(width / the_gcd) aspy = str(height / the_gcd) json_response['aspx'] = aspx json_response['aspy'] = aspy except TypeError: ## If the aspect detection completely fails os.remove(new_tmp_fullpath) register_exception(req=req, alert_admin=False) raise apache.SERVER_RETURN(apache.HTTP_FORBIDDEN) ## Try to extract some metadata from the video container metadata = ffprobe_metadata(new_tmp_fullpath) json_response['meta_title'] = metadata['format'].get('TAG:title') json_response['meta_description'] = metadata['format'].get('TAG:description') json_response['meta_year'] = metadata['format'].get('TAG:year') json_response['meta_author'] = metadata['format'].get('TAG:author') ## Empty file name else: raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) ## We found our file, we can break the loop break; # Send our response if CFG_JSON_AVAILABLE: dumped_response = json.dumps(json_response) # store the response in the websubmit directory # this is needed if the submission is not finished and continued later response_dir = os.path.join(curdir, 'files', str(user_info['uid']), "response") try: os.makedirs(response_dir) except OSError: # register_exception(req=req, alert_admin=False) pass fp = open(os.path.join(response_dir, "response"), "w") fp.write(dumped_response) fp.close() return dumped_response def getuploadedfile(self, req, form): """ Stream uploaded files. For the moment, restrict to files in ./curdir/files/uid or ./curdir/icons/uid directory, so that we are sure we stream files only to the user who uploaded them. """ argd = wash_urlargd(form, {'indir': (str, None), 'doctype': (str, None), 'access': (str, None), 'icon': (int, 0), 'key': (str, None), 'filename': (str, None), 'nowait': (int, 0)}) if None in argd.values(): raise apache.SERVER_RETURN(apache.HTTP_BAD_REQUEST) uid = getUid(req) if argd['icon']: file_path = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access'], 'icons', str(uid), argd['key'], argd['filename'] ) else: file_path = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, argd['indir'], argd['doctype'], argd['access'], 'files', str(uid), argd['key'], argd['filename'] ) abs_file_path = os.path.abspath(file_path) if abs_file_path.startswith(CFG_WEBSUBMIT_STORAGEDIR): # Check if file exist. Note that icon might not yet have # been created. if not argd['nowait']: for i in range(5): if os.path.exists(abs_file_path): return stream_file(req, abs_file_path) time.sleep(1) else: if os.path.exists(abs_file_path): return stream_file(req, abs_file_path) # Send error 404 in all other cases raise apache.SERVER_RETURN(apache.HTTP_NOT_FOUND) def attachfile(self, req, form): """ Process requests received from CKEditor to upload files. If the uploaded file is an image, create an icon version """ if not is_html_text_editor_installed(): return apache.HTTP_NOT_FOUND if not form.has_key('type'): form['type'] = 'File' if not form.has_key('upload') or \ not form['type'] in \ ['File', 'Image', 'Flash', 'Media']: #return apache.HTTP_NOT_FOUND pass filetype = form['type'].lower() uid = getUid(req) # URL where the file can be fetched after upload user_files_path = '%(CFG_SITE_URL)s/submit/getattachedfile/%(uid)s' % \ {'uid': uid, 'CFG_SITE_URL': CFG_SITE_URL, 'filetype': filetype} # Path to directory where uploaded files are saved user_files_absolute_path = '%(CFG_PREFIX)s/var/tmp/attachfile/%(uid)s/%(filetype)s' % \ {'uid': uid, 'CFG_PREFIX': CFG_PREFIX, 'filetype': filetype} try: os.makedirs(user_files_absolute_path) except: pass user_info = collect_user_info(req) (auth_code, auth_message) = acc_authorize_action(user_info, 'attachsubmissionfile') msg = "" if user_info['email'] == 'guest': # User is guest: must login prior to upload msg = 'Please login before uploading file.' elif auth_code: # User cannot submit msg = 'Sorry, you are not allowed to submit files.' ## elif len(form['upload']) != 1: ## msg = 'Sorry, you must upload one single file' else: # Process the upload and get the response (msg, uploaded_file_path, uploaded_file_name, uploaded_file_url, callback_function) = \ process_CKEditor_upload(form, uid, user_files_path, user_files_absolute_path) if uploaded_file_path: # Create an icon if form.get('type','') == 'Image': try: (icon_path, icon_name) = create_icon( { 'input-file' : uploaded_file_path, 'icon-name' : os.path.splitext(uploaded_file_name)[0], 'icon-file-format' : os.path.splitext(uploaded_file_name)[1][1:] or 'gif', 'multipage-icon' : False, 'multipage-icon-delay' : 100, 'icon-scale' : "300>", # Resize only if width > 300 'verbosity' : 0, }) # Move original file to /original dir, and replace it with icon file original_user_files_absolute_path = os.path.join(user_files_absolute_path, 'original') if not os.path.exists(original_user_files_absolute_path): # Create /original dir if needed os.mkdir(original_user_files_absolute_path) os.rename(uploaded_file_path, original_user_files_absolute_path + os.sep + uploaded_file_name) os.rename(icon_path + os.sep + icon_name, uploaded_file_path) except InvenioWebSubmitIconCreatorError, e: pass user_files_path += '/' + filetype + '/' + uploaded_file_name else: user_files_path = '' if not msg: msg = 'No valid file found' # Send our response send_response(req, msg, user_files_path, callback_function) def _lookup(self, component, path): """ This handler is invoked for the dynamic URLs (for getting and putting attachments) Eg: /submit/getattachedfile/41336978/image/myfigure.png /submit/attachfile/41336978/image/myfigure.png """ if component == 'getattachedfile' and len(path) > 2: uid = path[0] # uid of the submitter file_type = path[1] # file, image, flash or media (as # defined by CKEditor) if file_type in ['file', 'image', 'flash', 'media']: file_name = '/'.join(path[2:]) # the filename def answer_get(req, form): """Accessing files attached to submission.""" form['file'] = file_name form['type'] = file_type form['uid'] = uid return self.getattachedfile(req, form) return answer_get, [] # All other cases: file not found return None, [] def getattachedfile(self, req, form): """ Returns a file uploaded to the submission 'drop box' by the CKEditor. """ argd = wash_urlargd(form, {'file': (str, None), 'type': (str, None), 'uid': (int, 0)}) # Can user view this record, i.e. can user access its # attachments? uid = getUid(req) user_info = collect_user_info(req) if not argd['file'] is None: # Prepare path to file on disk. Normalize the path so that # ../ and other dangerous components are removed. path = os.path.abspath(CFG_PREFIX + '/var/tmp/attachfile/' + \ '/' + str(argd['uid']) + \ '/' + argd['type'] + '/' + argd['file']) # Check that we are really accessing attachements # directory, for the declared record. if path.startswith(CFG_PREFIX + '/var/tmp/attachfile/') and os.path.exists(path): return stream_file(req, path) # Send error 404 in all other cases return(apache.HTTP_NOT_FOUND) def continue_(self, req, form): """ Continue an interrupted submission. """ args = wash_urlargd(form, {'access': (str, ''), 'doctype': (str, '')}) ln = args['ln'] _ = gettext_set_language(ln) access = args['access'] doctype = args['doctype'] if not access or not doctype: return warningMsg(_("Sorry, invalid arguments"), req=req, ln=ln) user_info = collect_user_info(req) email = user_info['email'] res = run_sql("SELECT action, status FROM sbmSUBMISSIONS WHERE id=%s AND email=%s and doctype=%s", (access, email, doctype)) if res: action, status = res[0] if status == 'finished': return warningMsg(_("Note: the requested submission has already been completed"), req=req, ln=ln) redirect_to_url(req, CFG_SITE_SECURE_URL + '/submit/direct?' + urlencode({ 'sub': action + doctype, 'access': access})) return warningMsg(_("Sorry, you don't seem to have initiated a submission with the provided access number"), req=req, ln=ln) def direct(self, req, form): """Directly redirected to an initialized submission.""" args = wash_urlargd(form, {'sub': (str, ''), 'access' : (str, '')}) sub = args['sub'] access = args['access'] ln = args['ln'] _ = gettext_set_language(ln) uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "direct", navmenuid='submit') myQuery = req.args if not sub: return warningMsg(_("Sorry, 'sub' parameter missing..."), req, ln=ln) res = run_sql("SELECT docname,actname FROM sbmIMPLEMENT WHERE subname=%s", (sub,)) if not res: return warningMsg(_("Sorry. Cannot analyse parameter"), req, ln=ln) else: # get document type doctype = res[0][0] # get action name action = res[0][1] # retrieve other parameter values params = dict(form) # find existing access number if not access: # create 'unique' access number pid = os.getpid() now = time.time() access = "%i_%s" % (now,pid) # retrieve 'dir' value res = run_sql ("SELECT dir FROM sbmACTION WHERE sactname=%s", (action,)) dir = res[0][0] mainmenu = req.headers_in.get('referer') params['access'] = access params['act'] = action params['doctype'] = doctype params['startPg'] = '1' params['mainmenu'] = mainmenu params['ln'] = ln params['indir'] = dir url = "%s/submit?%s" % (CFG_SITE_SECURE_URL, urlencode(params)) redirect_to_url(req, url) def sub(self, req, form): """DEPRECATED: /submit/sub is deprecated now, so raise email to the admin (but allow submission to continue anyway)""" args = wash_urlargd(form, {'password': (str, '')}) uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../sub/", navmenuid='submit') try: raise DeprecationWarning, 'submit/sub handler has been used. Please use submit/direct. e.g. "submit/sub?RN=123@SBIFOO" -> "submit/direct?RN=123&sub=SBIFOO"' except DeprecationWarning: register_exception(req=req, alert_admin=True) ln = args['ln'] _ = gettext_set_language(ln) #DEMOBOO_RN=DEMO-BOOK-2008-001&ln=en&password=1223993532.26572%40APPDEMOBOO params = dict(form) password = args['password'] if password: del params['password'] if "@" in password: params['access'], params['sub'] = password.split('@', 1) else: params['sub'] = password else: args = str(req.args).split('@') if len(args) > 1: params = {'sub' : args[-1]} args = '@'.join(args[:-1]) params.update(cgi.parse_qs(args)) else: return warningMsg(_("Sorry, invalid URL..."), req, ln=ln) url = "%s/submit/direct?%s" % (CFG_SITE_SECURE_URL, urlencode(params, doseq=True)) redirect_to_url(req, url) def summary(self, req, form): args = wash_urlargd(form, { 'doctype': (str, ''), 'act': (str, ''), 'access': (str, ''), 'indir': (str, '')}) uid = getUid(req) if uid == -1 or CFG_ACCESS_CONTROL_LEVEL_SITE >= 1: return page_not_authorized(req, "../summary", navmenuid='submit') t="" curdir = os.path.join(CFG_WEBSUBMIT_STORAGEDIR, args['indir'], args['doctype'], args['access']) try: assert(curdir == os.path.abspath(curdir)) except AssertionError: register_exception(req=req, alert_admin=True, prefix='Possible cracking tentative: indir="%s", doctype="%s", access="%s"' % (args['indir'], args['doctype'], args['access'])) return warningMsg("Invalid parameters", req) subname = "%s%s" % (args['act'], args['doctype']) res = run_sql("select sdesc,fidesc,pagenb,level from sbmFIELD where subname=%s " "order by pagenb,fieldnb", (subname,)) nbFields = 0 values = [] for arr in res: if arr[0] != "": val = { 'mandatory' : (arr[3] == 'M'), 'value' : '', 'page' : arr[2], 'name' : arr[0], } if os.path.exists(os.path.join(curdir, curdir,arr[1])): fd = open(os.path.join(curdir, arr[1]),"r") value = fd.read() fd.close() value = value.replace("\n"," ") value = value.replace("Select:","") else: value = "" val['value'] = value values.append(val) return websubmit_templates.tmpl_submit_summary( ln = args['ln'], values = values, ) def index(self, req, form): args = wash_urlargd(form, { 'c': (str, CFG_SITE_NAME), 'doctype': (str, ''), 'act': (str, ''), 'startPg': (str, "1"), 'access': (str, ''), 'mainmenu': (str, ''), 'fromdir': (str, ''), 'nextPg': (str, ''), 'nbPg': (str, ''), 'curpage': (str, '1'), 'step': (str, '0'), 'mode': (str, 'U'), }) ## Strip whitespace from beginning and end of doctype and action: args["doctype"] = args["doctype"].strip() args["act"] = args["act"].strip() def _index(req, c, ln, doctype, act, startPg, access, mainmenu, fromdir, nextPg, nbPg, curpage, step, mode): auth_args = {} if doctype: auth_args['doctype'] = doctype if act: auth_args['act'] = act uid = getUid(req) if CFG_CERN_SITE: ## HACK BEGIN: this is a hack for CMS and ATLAS draft from invenio.webuser import collect_user_info user_info = collect_user_info(req) if doctype == 'CMSPUB' and act == "" and 'cds-admin [CERN]' not in user_info['group'] and not user_info['email'].lower() == 'cds.support@cern.ch': if 'cms-publication-committee-chair [CERN]' not in user_info['group']: return page_not_authorized(req, "../submit", text="In order to access this submission interface you need to be member of the CMS Publication Committee Chair.", navmenuid='submit') elif doctype == 'ATLPUB' and 'cds-admin [CERN]' not in user_info['group'] and not user_info['email'].lower() == 'cds.support@cern.ch': if 'atlas-gen [CERN]' not in user_info['group']: return page_not_authorized(req, "../submit", text="In order to access this submission interface you need to be member of ATLAS.", navmenuid='submit') ## HACK END if doctype == "": catalogues_text, at_least_one_submission_authorized, submission_exists= makeCataloguesTable(req, ln=CFG_SITE_LANG) if not at_least_one_submission_authorized and submission_exists: if isGuestUser(uid): return redirect_to_url(req, "%s/youraccount/login%s" % ( CFG_SITE_SECURE_URL, make_canonical_urlargd({'referer' : CFG_SITE_SECURE_URL + req.unparsed_uri, 'ln' : args['ln']}, {})) , norobot=True) else: return page_not_authorized(req, "../submit", uid=uid, navmenuid='submit') return home(req,catalogues_text, c,ln) elif act == "": return action(req,c,ln,doctype) elif int(step)==0: return interface(req, c, ln, doctype, act, startPg, access, mainmenu, fromdir, nextPg, nbPg, curpage) else: return endaction(req, c, ln, doctype, act, startPg, access,mainmenu, fromdir, nextPg, nbPg, curpage, step, mode) return _index(req, **args) # Answer to both /submit/ and /submit __call__ = index def errorMsg(title, req, c=None, ln=CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) if c is None: c = CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME) return page(title = _("Error"), body = create_error_box(req, title=str(title), verbose=0, ln=ln), description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='submit') def warningMsg(title, req, c=None, ln=CFG_SITE_LANG): # load the right message language _ = gettext_set_language(ln) if c is None: c = CFG_SITE_NAME_INTL.get(ln, CFG_SITE_NAME) return page(title = _("Warning"), body = title, description="%s - Internal Error" % c, keywords="%s, Internal Error" % c, uid = getUid(req), language=ln, req=req, navmenuid='submit') def print_warning(msg, type='', prologue='
    ', epilogue='
    '): """Prints warning message and flushes output.""" if msg: return websubmit_templates.tmpl_print_warning( msg = msg, type = type, prologue = prologue, epilogue = epilogue, ) else: return '' ## def retrieve_most_recent_attached_file(file_path): ## """ ## Retrieve the latest file that has been uploaded with the ## CKEditor. This is the only way to retrieve files that the ## CKEditor has renamed after the upload. ## Eg: 'prefix/image.jpg' was uploaded but did already ## exist. CKEditor silently renamed it to 'prefix/image(1).jpg': ## >>> retrieve_most_recent_attached_file('prefix/image.jpg') ## 'prefix/image(1).jpg' ## """ ## (base_path, filename) = os.path.split(file_path) ## base_name = os.path.splitext(filename)[0] ## file_ext = os.path.splitext(filename)[1][1:] ## most_recent_filename = filename ## i = 0 ## while True: ## i += 1 ## possible_filename = "%s(%d).%s" % \ ## (base_name, i, file_ext) ## if os.path.exists(base_path + os.sep + possible_filename): ## most_recent_filename = possible_filename ## else: ## break ## return os.path.join(base_path, most_recent_filename)