diff --git a/modules/bibformat/lib/elements/bfe_fulltext.py b/modules/bibformat/lib/elements/bfe_fulltext.py index fb3dd63c4..8b3eca658 100644 --- a/modules/bibformat/lib/elements/bfe_fulltext.py +++ b/modules/bibformat/lib/elements/bfe_fulltext.py @@ -1,273 +1,274 @@ # -*- coding: utf-8 -*- ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints a links to fulltext """ __revision__ = "$Id$" import re from invenio.bibdocfile import BibRecDocs, file_strip_ext from invenio.messages import gettext_set_language from invenio.config import CFG_SITE_URL, CFG_CERN_SITE from cgi import escape from urlparse import urlparse from os.path import basename import urllib cern_arxiv_categories = ["astro-ph", "chao-dyn", "cond-mat", "gr-qc", "hep-ex", "hep-lat", "hep-ph", "hep-th", "math-ph", "math", "nucl-ex", "nucl-th", "out", "physics", "quant-ph", "q-alg", "cs", "adap-org", "comp-gas", "chem-ph", "cs", "math", "neuro-sys", "patt-sol", "solv-int", "acc-phys", "alg-geom", "ao-sci", "atom-ph", "cmp-lg", "dg-ga", "funct-an", "mtrl-th", "plasm-ph", "q-alg", "supr-con"] def format(bfo, style, separator='; ', show_icons='no'): """ This is the default format for formatting fulltext links. When possible, it returns only the main file(s) (+ link to additional files if needed). If no distinction is made at submission time between main and additional files, returns all the files @param separator: the separator between urls. @param style: CSS class of the link @param show_icons: if 'yes', print icons for fulltexts """ _ = gettext_set_language(bfo.lang) out = '' # Retrieve files (parsed_urls, old_versions, additionals) = get_files(bfo) main_urls = parsed_urls['main_urls'] others_urls = parsed_urls['others_urls'] if parsed_urls.has_key('cern_urls'): cern_urls = parsed_urls['cern_urls'] # Prepare style and icon if style != "": style = 'class="'+style+'"' if show_icons.lower() == 'yes': file_icon = '%s' % (CFG_SITE_URL, _("Download fulltext")) else: file_icon = '' # Build urls list. # Escape special chars for tag value. additional_str = '' if additionals: additional_str = ' (%s)' % _("additional files") versions_str = '' #if old_versions: #versions_str = ' (%s)' % _("older versions") if main_urls: last_name = "" main_urls_keys = sort_alphanumerically(main_urls.keys()) for descr in main_urls_keys: urls = main_urls[descr] out += "%s: " % descr url_list = [] ## FIXME: This is so ugly! urls_dict = {} for url, name, format in urls: urls_dict[url] = (name, format) urls_dict_keys = sort_alphanumerically(urls_dict.keys()) for url in urls_dict_keys: name, format = urls_dict[url] if not name == last_name and len(main_urls) > 1: print_name = "%s - " % name else: print_name = "" last_name = name url_list.append(print_name + ''+ \ file_icon + format.upper()+'') out += separator.join(url_list) + additional_str + versions_str + '
' if CFG_CERN_SITE and cern_urls: link_word = len(cern_urls) == 1 and _('%(x_sitename)s link') or _('%(x_sitename)s links') out += '%s: ' % (link_word % {'x_sitename': 'CERN'}) url_list = [] for url, descr in cern_urls: url_list.append(''+ \ file_icon + escape(str(descr))+'') out += separator.join(url_list) if others_urls: external_link = len(others_urls) == 1 and _('external link') or _('external links') out += '%s: ' % external_link.capitalize() url_list = [] for url, descr in others_urls: url_list.append(''+ \ file_icon + escape(str(descr))+'') out += separator.join(url_list) + '
' if out.endswith('
'): out = out[:-len('
')] # When exported to text (eg. in WebAlert emails) we do not want to # display the link to the fulltext: if out: out = '' + out + '' return out def escape_values(bfo): """ Called by BibFormat in order to check if output of this element should be escaped. """ return 0 def get_files(bfo): """ Returns the files available for the given record. Returned structure is a tuple (parsed_urls, old_versions, additionals): - parsed_urls: contains categorized URLS (see details below) - old_versions: set to True if we can have access to old versions - additionals: set to True if we have other documents than the 'main' document 'parsed_urls' is a dictionary in the form: {'main_urls' : {'Main' : [('http://CFG_SITE_URL/record/1/files/aFile.pdf', 'aFile', 'PDF'), ('http://CFG_SITE_URL/record/1/files/aFile.gif', 'aFile', 'GIF')], 'Additional': [('http://CFG_SITE_URL/record/1/files/bFile.pdf', 'bFile', 'PDF')]}, 'other_urls': [('http://externalurl.com/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://externalurl.com/bFile.pdf', 'Fulltext')], 'cern_urls' : [('http://cern.ch/aFile.pdf', 'Fulltext'), # url(8564_u), description(8564_z/y) ('http://cern.ch/bFile.pdf', 'Fulltext')], } Some notes about returned structure: - key 'cern_urls' is only available on CERN site - keys in main_url dictionaries are defined by the BibDoc. - older versions are not part of the parsed urls - returns only main files when possible, that is when doctypes make a distinction between "Main" files and other files. Otherwise returns all the files as main. """ _ = gettext_set_language(bfo.lang) urls = bfo.fields("8564_") bibarchive = BibRecDocs(bfo.recID) old_versions = False # We can provide link to older files. Will be # set to True if older files are found. additionals = False # We have additional files. Will be set to # True if additional files are found. # Prepare object to return parsed_urls = {'main_urls':{}, # Urls hosted by Invenio (bibdocs) 'others_urls':[] # External urls } if CFG_CERN_SITE: parsed_urls['cern_urls'] = [] # cern.ch urls # Doctypes can of any type, but when there is one file marked as # 'Main', we consider that there is a distinction between "main" # and "additional" files. Otherwise they will all be considered # equally as main files distinct_main_and_additional_files = False if len(bibarchive.list_bibdocs(doctype='Main')) > 0: distinct_main_and_additional_files = True # Parse URLs for complete_url in urls: if complete_url.has_key('u'): url = complete_url['u'] (dummy, host, path, dummy, params, dummy) = urlparse(url) filename = urllib.unquote(basename(path)) name = file_strip_ext(filename) format = filename[len(name):] if format.startswith('.'): format = format[1:] descr = '' if complete_url.has_key('y'): descr = complete_url['y'] if not url.startswith(CFG_SITE_URL): # Not a bibdoc? if not descr: # For not bibdoc let's have a description # Display the URL in full: descr = url if CFG_CERN_SITE and 'cern.ch' in host and \ ('/setlink?' in url or \ 'cms' in host or \ 'documents.cern.ch' in url or \ 'doc.cern.ch' in url or \ 'preprints.cern.ch' in url): - url_params_dict = dict([part.split('=') for part in params.split('&')]) + url_params_dict = dict([part.split('=') for part in params.split('&') \ + if len(part) == 2]) if url_params_dict.has_key('categ') and \ (url_params_dict['categ'].split('.', 1)[0] in cern_arxiv_categories) and \ url_params_dict.has_key('id'): # Old arXiv links, used to be handled by # setlink. Provide direct links to arXiv for file_format, label in [('pdf', "PDF")]:#, #('ps', "PS"), #('e-print', "Source (generally TeX or LaTeX)"), #('abs', "Abstract")]: url = "http://arxiv.org/%(format)s/%(category)s/%(id)s" % \ {'format': file_format, 'category': url_params_dict['categ'], 'id': url_params_dict['id']} parsed_urls['others_urls'].append((url, "%s/%s %s" % \ (url_params_dict['categ'], url_params_dict['id'], label))) else: parsed_urls['others_urls'].append((url, descr)) # external url else: # It's a bibdoc! assigned = False for doc in bibarchive.list_bibdocs(): if int(doc.get_latest_version()) > 1: old_versions = True if True in [f.fullname.startswith(filename) \ for f in doc.list_all_files()]: assigned = True #doc.getIcon() if not doc.doctype == 'Main' and \ distinct_main_and_additional_files == True: # In that case we record that there are # additional files, but don't add them to # returned structure. additionals = True else: if not descr: descr = _('Fulltext') if not parsed_urls['main_urls'].has_key(descr): parsed_urls['main_urls'][descr] = [] parsed_urls['main_urls'][descr].append((url, name, format)) if not assigned: # Url is not a bibdoc :-S if not descr: descr = filename parsed_urls['others_urls'].append((url, descr)) # Let's put it in a general other url return (parsed_urls, old_versions, additionals) _RE_SPLIT = re.compile(r"\d+|\D+") def sort_alphanumerically(elements): elements = [([not token.isdigit() and token or int(token) for token in _RE_SPLIT.findall(element)], element) for element in elements] elements.sort() return [element[1] for element in elements]