diff --git a/modules/Makefile.am b/modules/Makefile.am index 1dd7a6696..76538a49d 100644 --- a/modules/Makefile.am +++ b/modules/Makefile.am @@ -1,22 +1,22 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bibclassify bibconvert bibedit bibharvest bibmatch bibsched bibindex bibrank bibupload bibformat elmsubmit miscutil webstyle websession webhelp webbasket webalert websearch websubmit webaccess webmessage webstat webcomment +SUBDIRS = bibclassify bibconvert bibedit bibharvest bibmatch bibsched bibindex bibrank bibupload bibformat elmsubmit miscutil webstyle websession webhelp webbasket webalert websearch websubmit webaccess webmessage webstat webcomment webjournal CLEANFILES = *~ diff --git a/modules/bibformat/lib/bibformat_engine.py b/modules/bibformat/lib/bibformat_engine.py index cc586b4c7..d16b33445 100644 --- a/modules/bibformat/lib/bibformat_engine.py +++ b/modules/bibformat/lib/bibformat_engine.py @@ -1,1972 +1,1973 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Formats a single XML Marc record using specified format. There is no API for the engine. Instead use bibformat.py. SEE: bibformat.py, bibformat_utils.py """ __revision__ = "$Id$" import re import sys import os import inspect import traceback import zlib import cgi from invenio.config import \ CFG_PATH_PHP, \ bindir, \ cdslang from invenio.errorlib import \ register_errors, \ get_msgs_for_code_list from invenio.bibrecord import \ create_record, \ record_get_field_instances, \ record_get_field_value, \ record_get_field_values from invenio.bibformat_xslt_engine import format from invenio.dbquery import run_sql from invenio.messages import \ language_list_long, \ wash_language from invenio import bibformat_dblayer from invenio.bibformat_config import \ CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION, \ CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, \ CFG_BIBFORMAT_TEMPLATES_PATH, \ CFG_BIBFORMAT_ELEMENTS_PATH, \ CFG_BIBFORMAT_OUTPUTS_PATH, \ CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH from invenio.bibformat_utils import \ record_get_xml, \ parse_tag from invenio.htmlutils import HTMLWasher if CFG_PATH_PHP: #Remove when call_old_bibformat is removed from xml.dom import minidom import tempfile # Cache for data we have already read and parsed format_templates_cache = {} format_elements_cache = {} format_outputs_cache = {} kb_mappings_cache = {} cdslangs = language_list_long() html_field = '' # String indicating that field should be # treated as HTML (and therefore no escaping of # HTML tags should occur. # Appears in some field values. washer = HTMLWasher() # Used to remove dangerous tags from HTML # sources # Regular expression for finding ... tag in format templates pattern_lang = re.compile(r''' #closing start tag (?P.*?) #anything but the next group (greedy) () #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) # Builds regular expression for finding each known language in tags ln_pattern_text = r"<(" for lang in cdslangs: ln_pattern_text += lang[0] +r"|" ln_pattern_text = ln_pattern_text.rstrip(r"|") ln_pattern_text += r")>(.*?)" ln_pattern = re.compile(ln_pattern_text, re.IGNORECASE | re.DOTALL) # Regular expression for finding tag in format templates pattern_format_template_name = re.compile(r''' #closing start tag (?P.*?) #name value. any char that is not end tag ()(\n)? #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) # Regular expression for finding tag in format templates pattern_format_template_desc = re.compile(r''' #closing start tag (?P.*?) #description value. any char that is not end tag (\n)? #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) # Regular expression for finding tags in format templates pattern_tag = re.compile(r''' [^/\s]+) #any char but a space or slash \s* #any number of spaces (?P(\s* #params here (?P([^=\s])*)\s* #param name: any chars that is not a white space or equality. Followed by space(s) =\s* #equality: = followed by any number of spaces (?P[\'"]) #one of the separators (?P.*?) #param value: any chars that is not a separator like previous one (?P=sep) #same separator as starting one )*) #many params \s* #any number of spaces (/)?> #end of the tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) # Regular expression for finding params inside tags in format templates pattern_function_params = re.compile(''' (?P([^=\s])*)\s* # Param name: any chars that is not a white space or equality. Followed by space(s) =\s* # Equality: = followed by any number of spaces (?P[\'"]) # One of the separators (?P.*?) # Param value: any chars that is not a separator like previous one (?P=sep) # Same separator as starting one ''', re.VERBOSE | re.DOTALL ) # Regular expression for finding format elements "params" attributes # (defined by @param) pattern_format_element_params = re.compile(''' @param\s* # Begins with @param keyword followed by space(s) (?P[^\s=]*)\s* # A single keyword, and then space(s) #(=\s*(?P[\'"]) # Equality, space(s) and then one of the separators #(?P.*?) # Default value: any chars that is not a separator like previous one #(?P=sep) # Same separator as starting one #)?\s* # Default value for param is optional. Followed by space(s) (?P.*) # Any text that is not end of line (thanks to MULTILINE parameter) ''', re.VERBOSE | re.MULTILINE) # Regular expression for finding format elements "see also" attribute # (defined by @see) pattern_format_element_seealso = re.compile('''@see\s*(?P.*)''', re.VERBOSE | re.MULTILINE) #Regular expression for finding 2 expressions in quotes, separated by #comma (as in template("1st","2nd") ) #Used when parsing output formats ## pattern_parse_tuple_in_quotes = re.compile(''' ## (?P[\'"]) ## (?P.*) ## (?P=sep1) ## \s*,\s* ## (?P[\'"]) ## (?P.*) ## (?P=sep2) ## ''', re.VERBOSE | re.MULTILINE) def call_old_bibformat(recID, format="HD", on_the_fly=False, verbose=0): """ FIXME: REMOVE FUNCTION WHEN MIGRATION IS DONE Calls BibFormat for the record RECID in the desired output format FORMAT. @param on_the_fly if False, try to return an already preformatted version of the record in the database Note: this functions always try to return HTML, so when bibformat returns XML with embedded HTML format inside the tag FMT $g, as is suitable for prestoring output formats, we perform un-XML-izing here in order to return HTML body only. """ out = "" res = [] if not on_the_fly: # look for formatted notice existence: query = "SELECT value, last_updated FROM bibfmt WHERE "\ "id_bibrec='%s' AND format='%s'" % (recID, format) res = run_sql(query, None, 1) if res: # record 'recID' is formatted in 'format', so print it if verbose == 9: last_updated = res[0][1] out += """\n
Found preformatted output for record %i (cache updated on %s). """ % (recID, last_updated) decompress = zlib.decompress return "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format', # so try to call BibFormat on the fly or use default format: if verbose == 9: out += """\n
Formatting record %i on-the-fly with old BibFormat.
""" % recID # Retrieve MARCXML # Build it on-the-fly only if 'call_old_bibformat' was called # with format=xm and on_the_fly=True xm_record = record_get_xml(recID, 'xm', on_the_fly=(on_the_fly and format == 'xm')) ## import platform ## # Some problem have been found using either popen or os.system command. ## # Here is a temporary workaround until the issue is solved. ## if platform.python_compiler().find('Red Hat') > -1: ## # use os.system ## (result_code, result_path) = tempfile.mkstemp() ## command = "( %s/bibformat otype=%s ) > %s" % (bindir, format, result_path) ## (xm_code, xm_path) = tempfile.mkstemp() ## xm_file = open(xm_path, "w") ## xm_file.write(xm_record) ## xm_file.close() ## command = command + " <" + xm_path ## os.system(command) ## result_file = open(result_path,"r") ## bibformat_output = result_file.read() ## result_file.close() ## os.remove(result_path) ## os.remove(xm_path) ## else: ## # use popen pipe_input, pipe_output, pipe_error = os.popen3(["%s/bibformat" % bindir, "otype=%s" % format], 'rw') pipe_input.write(xm_record) pipe_input.flush() pipe_input.close() bibformat_output = pipe_output.read() pipe_output.close() pipe_error.close() if bibformat_output.startswith(""): dom = minidom.parseString(bibformat_output) for e in dom.getElementsByTagName('subfield'): if e.getAttribute('code') == 'g': for t in e.childNodes: out += t.data.encode('utf-8') else: out += bibformat_output return out def format_record(recID, of, ln=cdslang, verbose=0, search_pattern=[], xml_record=None, uid=None): """ Formats a record given output format. Main entry function of bibformat engine. Returns a formatted version of the record in the specified language, search pattern, and with the specified output format. The function will define which format template must be applied. You can either specify an record ID to format, or give its xml representation. if 'xml_record' is not None, then use it instead of recID. 'uid' allows to grant access to some functionalities on a page depending on the user's priviledges. @param recID the ID of record to format @param of an output format code (or short identifier for the output format) @param ln the language to use to format the record @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, stop if error in format elements 9: errors and warnings, stop if error (debug mode )) @param search_pattern list of strings representing the user request in web interface @param xml_record an xml string representing the record to format @param uid the user id of the person who will view the formatted page @return formatted record """ out = "" errors_ = [] # Temporary workflow (during migration of formats): # Call new BibFormat # But if format not found for new BibFormat, then call old BibFormat #Create a BibFormat Object to pass that contain record and context bfo = BibFormatObject(recID, ln, search_pattern, xml_record, uid, of) #Find out which format template to use based on record and output format. template = decide_format_template(bfo, of) if verbose == 9 and template is not None: out += """\n
Using %s template for record %i. """ % (template, recID) ############### FIXME: REMOVE WHEN MIGRATION IS DONE ############### path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, template) if template is None or not os.access(path, os.R_OK): # template not found in new BibFormat. Call old one if verbose == 9: if template is None: out += """\n
No template found for output format %s and record %i. (Check invenio.err log file for more details) """ % (of, recID) else: out += """\n
Template %s could not be read. """ % (template) if CFG_PATH_PHP: if verbose == 9: out += """\n
Using old BibFormat for record %s. """ % recID return out + call_old_bibformat(recID, format=of, on_the_fly=True, verbose=verbose) ############################# END ################################## error = get_msgs_for_code_list([("ERR_BIBFORMAT_NO_TEMPLATE_FOUND", of)], stream='error', ln=cdslang) errors_.append(error) if verbose == 0: register_errors(error, 'error') elif verbose > 5: return out + error[0][1] return out # Format with template (out_, errors) = format_with_format_template(template, bfo, verbose) errors_.extend(errors) out += out_ return out def decide_format_template(bfo, of): """ Returns the format template name that should be used for formatting given output format and BibFormatObject. Look at of rules, and take the first matching one. If no rule matches, returns None To match we ignore lettercase and spaces before and after value of rule and value of record @param bfo a BibFormatObject @param of the code of the output format to use """ output_format = get_output_format(of) for rule in output_format['rules']: value = bfo.field(rule['field']).strip()#Remove spaces pattern = rule['value'].strip() #Remove spaces match_obj = re.match(pattern, value, re.IGNORECASE) if match_obj is not None and \ match_obj.start() == 0 and match_obj.end() == len(value): return rule['template'] template = output_format['default'] if template != '': return template else: return None def format_with_format_template(format_template_filename, bfo, verbose=0, format_template_code=None): """ Format a record given a format template. Also returns errors Returns a formatted version of the record represented by bfo, in the language specified in bfo, and with the specified format template. If format_template_code is provided, the template will not be loaded from format_template_filename (but format_template_filename will still be used to determine if bft or xsl transformation applies). This allows to preview format code without having to save file on disk. @param format_template_filename the dilename of a format template @param bfo the object containing parameters for the current formatting @param format_template_code if not empty, use code as template instead of reading format_template_filename (used for previews) @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (formatted text, errors) """ errors_ = [] if format_template_code is not None: format_content = str(format_template_code) else: format_content = get_format_template(format_template_filename)['code'] if format_template_filename is None or \ format_template_filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION): # .bft localized_format = filter_languages(format_content, bfo.lang) (evaluated_format, errors) = eval_format_template_elements(localized_format, bfo, verbose) errors_ = errors else: #.xsl # Fetch MARCXML. On-the-fly xm if we are now formatting in xm xml_record = record_get_xml(bfo.recID, 'xm', on_the_fly=(bfo.format != 'xm')) # Transform MARCXML using stylesheet evaluated_format = format(xml_record, template_source=format_content) return (evaluated_format, errors_) def eval_format_template_elements(format_template, bfo, verbose=0): """ Evalutes the format elements of the given template and replace each element with its value. Also returns errors. Prepare the format template content so that we can directly replace the marc code by their value. This implies: 1) Look for special tags 2) replace special tags by their evaluation @param format_template the format template code @param bfo the object containing parameters for the current formatting @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (result, errors) """ errors_ = [] # First define insert_element_code(match), used in re.sub() function def insert_element_code(match): """ Analyses 'match', interpret the corresponding code, and return the result of the evaluation. Called by substitution in 'eval_format_template_elements(...)' @param match a match object corresponding to the special tag that must be interpreted """ function_name = match.group("function_name") try: format_element = get_format_element(function_name, verbose) except Exception, e: if verbose >= 5: return '' + \ cgi.escape(str(e)).replace('\n', '
') + \ '
' if format_element is None: error = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", function_name)], stream='error', ln=cdslang) errors_.append(error) if verbose >= 5: return '' + \ error[0][1]+'' else: params = {} # Look for function parameters given in format template code all_params = match.group('params') if all_params is not None: function_params_iterator = pattern_function_params.finditer(all_params) for param_match in function_params_iterator: name = param_match.group('param') value = param_match.group('value') params[name] = value # Evaluate element with params and return (Do not return errors) (result, errors) = eval_format_element(format_element, bfo, params, verbose) errors_.append(errors) return result # Substitute special tags in the format by our own text. # Special tags have the form format = pattern_tag.sub(insert_element_code, format_template) return (format, errors_) def eval_format_element(format_element, bfo, parameters={}, verbose=0): """ Returns the result of the evaluation of the given format element name, with given BibFormatObject and parameters. Also returns the errors of the evaluation. @param format_element a format element structure as returned by get_format_element @param bfo a BibFormatObject used for formatting @param parameters a dict of parameters to be used for formatting. Key is parameter and value is value of parameter @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (result, errors) """ errors = [] #Load special values given as parameters prefix = parameters.get('prefix', "") suffix = parameters.get('suffix', "") default_value = parameters.get('default', "") escape = parameters.get('escape', "") output_text = '' # 3 possible cases: # a) format element file is found: we execute it # b) format element file is not found, but exist in tag table (e.g. bfe_isbn) # c) format element is totally unknown. Do nothing or report error if format_element is not None and format_element['type'] == "python": # a) We found an element with the tag name, of type "python" # Prepare a dict 'params' to pass as parameter to 'format' # function of element params = {} # Look for parameters defined in format element # Fill them with specified default values and values # given as parameters for param in format_element['attrs']['params']: name = param['name'] default = param['default'] params[name] = parameters.get(name, default) # Add BibFormatObject params['bfo'] = bfo # Execute function with given parameters and return result. function = format_element['code'] try: output_text = apply(function, (), params) except Exception, e: name = format_element['attrs']['name'] error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT", name, str(params)) errors.append(error) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: tb = sys.exc_info()[2] error_string = get_msgs_for_code_list(error, stream='error', ln=cdslang) stack = traceback.format_exception(Exception, e, tb, limit=None) output_text = ''+ \ str(error_string[0][1]) + "".join(stack) +' ' # None can be returned when evaluating function if output_text is None: output_text = "" else: output_text = str(output_text) # Escaping: # (1) By default, everything is escaped in mode 1 # (2) If evaluated element has 'escape_values()' function, use # its returned value as escape mode, and override (1) # (3) If template has a defined parameter (in allowed values), # use it, and override (1) and (2) # (1) escape_mode = 1 # (2) escape_function = format_element['escape_function'] if escape_function is not None: try: escape_mode = apply(escape_function, (), {'bfo': bfo}) except Exception, e: error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT_ESCAPE", name) errors.append(error) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: tb = sys.exc_info()[2] error_string = get_msgs_for_code_list(error, stream='error', ln=cdslang) output_text += ''+ \ str(error_string[0][1]) +' ' # (3) if escape in ['0', '1', '2', '3', '4']: escape_mode = int(escape) #If escape is equal to 1, then escape all # HTML reserved chars. if escape_mode > 0: output_text = escape_field(output_text, mode=escape_mode) # Add prefix and suffix if they have been given as parameters and if # the evaluation of element is not empty if output_text.strip() != "": output_text = prefix + output_text + suffix # Add the default value if output_text is empty if output_text == "": output_text = default_value return (output_text, errors) elif format_element is not None and format_element['type'] == "field": # b) We have not found an element in files that has the tag # name. Then look for it in the table "tag" # # # # Load special values given as parameters separator = parameters.get('separator ', "") nbMax = parameters.get('nbMax', "") escape = parameters.get('escape', "1") # By default, escape here # Get the fields tags that have to be printed tags = format_element['attrs']['tags'] output_text = [] # Get values corresponding to tags for tag in tags: p_tag = parse_tag(tag) values = record_get_field_values(bfo.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) if len(values)>0 and isinstance(values[0], dict): #flatten dict to its values only values_list = map(lambda x: x.values(), values) #output_text.extend(values) for values in values_list: output_text.extend(values) else: output_text.extend(values) if nbMax != "": try: nbMax = int(nbMax) output_text = output_text[:nbMax] except: name = format_element['attrs']['name'] error = ("ERR_BIBFORMAT_NBMAX_NOT_INT", name) errors.append(error) if verbose < 5: register_errors(error, 'error') elif verbose >= 5: error_string = get_msgs_for_code_list(error, stream='error', ln=cdslang) output_text = output_text.append(error_string[0][1]) # Add prefix and suffix if they have been given as parameters and if # the evaluation of element is not empty. # If evaluation is empty string, return default value if it exists. # Else return empty string if ("".join(output_text)).strip() != "": # If escape is equal to 1, then escape all # HTML reserved chars. if escape == '1': output_text = cgi.escape(separator.join(output_text)) else: output_text = separator.join(output_text) output_text = prefix + output_text + suffix else: #Return default value output_text = default_value return (output_text, errors) else: # c) Element is unknown error = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", format_element)], stream='error', ln=cdslang) errors.append(error) if verbose < 5: register_errors(error, 'error') return ("", errors) elif verbose >= 5: if verbose >= 9: sys.exit(error[0][1]) return ('' + \ error[0][1]+'', errors) def filter_languages(format_template, ln='en'): """ Filters the language tags that do not correspond to the specified language. @param format_template the format template code @param ln the language that is NOT filtered out from the template @return the format template with unnecessary languages filtered out """ # First define search_lang_tag(match) and clean_language_tag(match), used # in re.sub() function def search_lang_tag(match): """ Searches for the ... tag and remove inner localized tags such as , , that are not current_lang. If current_lang cannot be found inside ... , try to use 'cdslang' @param match a match object corresponding to the special tag that must be interpreted """ current_lang = ln def clean_language_tag(match): """ Return tag text content if tag language of match is output language. Called by substitution in 'filter_languages(...)' @param match a match object corresponding to the special tag that must be interpreted """ if match.group(1) == current_lang: return match.group(2) else: return "" # End of clean_language_tag lang_tag_content = match.group("langs") # Try to find tag with current lang. If it does not exists, # then current_lang becomes cdslang until the end of this # replace pattern_current_lang = re.compile(r"<("+current_lang+ \ r")\s*>(.*?)()", re.IGNORECASE | re.DOTALL) if re.search(pattern_current_lang, lang_tag_content) is None: current_lang = cdslang cleaned_lang_tag = ln_pattern.sub(clean_language_tag, lang_tag_content) return cleaned_lang_tag # End of search_lang_tag filtered_format_template = pattern_lang.sub(search_lang_tag, format_template) return filtered_format_template def get_format_template(filename, with_attributes=False): """ Returns the structured content of the given formate template. if 'with_attributes' is true, returns the name and description. Else 'attrs' is not returned as key in dictionary (it might, if it has already been loaded previously) {'code':"Some template code" 'attrs': {'name': "a name", 'description': "a description"} } @param filename the filename of an format template @param with_attributes if True, fetch the attributes (names and description) for format' @return strucured content of format template """ # Get from cache whenever possible global format_templates_cache if not filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION) and \ not filename.endswith(".xsl"): return None if format_templates_cache.has_key(filename): # If we must return with attributes and template exist in # cache with attributes then return cache. # Else reload with attributes if with_attributes and \ format_templates_cache[filename].has_key('attrs'): return format_templates_cache[filename] format_template = {'code':""} try: path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, filename) format_file = open(path) format_content = format_file.read() format_file.close() # Load format template code # Remove name and description if filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION): code_and_description = pattern_format_template_name.sub("", format_content) code = pattern_format_template_desc.sub("", code_and_description) else: code = format_content format_template['code'] = code except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))], stream='error', ln=cdslang) register_errors(errors, 'error') # Save attributes if necessary if with_attributes: format_template['attrs'] = get_format_template_attrs(filename) # Cache and return format_templates_cache[filename] = format_template return format_template def get_format_templates(with_attributes=False): """ Returns the list of all format templates, as dictionary with filenames as keys if 'with_attributes' is true, returns the name and description. Else 'attrs' is not returned as key in each dictionary (it might, if it has already been loaded previously) [{'code':"Some template code" 'attrs': {'name': "a name", 'description': "a description"} }, ... } @param with_attributes if True, fetch the attributes (names and description) for formats """ format_templates = {} files = os.listdir(CFG_BIBFORMAT_TEMPLATES_PATH) for filename in files: if filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION) or \ filename.endswith(".xsl"): format_templates[filename] = get_format_template(filename, with_attributes) return format_templates def get_format_template_attrs(filename): """ Returns the attributes of the format template with given filename The attributes are {'name', 'description'} Caution: the function does not check that path exists or that the format element is valid. @param the path to a format element """ attrs = {} attrs['name'] = "" attrs['description'] = "" try: template_file = open("%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, filename)) code = template_file.read() template_file.close() match = None if filename.endswith(".xsl"): # .xsl attrs['name'] = filename[:-4] else: # .bft match = pattern_format_template_name.search(code) if match is not None: attrs['name'] = match.group('name') else: attrs['name'] = filename match = pattern_format_template_desc.search(code) if match is not None: attrs['description'] = match.group('desc').rstrip('.') except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))], stream='error', ln=cdslang) register_errors(errors, 'error') attrs['name'] = filename return attrs def get_format_element(element_name, verbose=0, with_built_in_params=False): """ Returns the format element structured content. Return None if element cannot be loaded (file not found, not readable or invalid) The returned structure is {'attrs': {some attributes in dict. See get_format_element_attrs_from_*} 'code': the_function_code, 'type':"field" or "python" depending if element is defined in file or table, 'escape_function': the function to call to know if element output must be escaped} @param element_name the name of the format element to load @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @param with_built_in_params if True, load the parameters built in all elements @return a dictionary with format element attributes """ # Get from cache whenever possible global format_elements_cache errors = [] # Resolve filename and prepare 'name' as key for the cache filename = resolve_format_element_filename(element_name) if filename is not None: name = filename.upper() else: name = element_name.upper() if format_elements_cache.has_key(name): element = format_elements_cache[name] if not with_built_in_params or \ (with_built_in_params and \ element['attrs'].has_key('builtin_params')): return element if filename is None: # Element is maybe in tag table if bibformat_dblayer.tag_exists_for_name(element_name): format_element = {'attrs': get_format_element_attrs_from_table( \ element_name, with_built_in_params), 'code':None, 'escape_function':None, 'type':"field"} # Cache and returns format_elements_cache[name] = format_element return format_element else: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND", element_name)], stream='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(errors[0][1]) return None else: format_element = {} module_name = filename if module_name.endswith(".py"): module_name = module_name[:-3] # Load element try: module = __import__(CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH + \ "." + module_name) # Load last module in import path # For eg. load bfe_name in # invenio.bibformat_elements.bfe_name # Used to keep flexibility regarding where elements # directory is (for eg. test cases) components = CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH.split(".") for comp in components[1:]: module = getattr(module, comp) except Exception, e: # We catch all exceptions here, as we just want to print # traceback in all cases tb = sys.exc_info()[2] stack = traceback.format_exception(Exception, e, tb, limit=None) errors = get_msgs_for_code_list([("ERR_BIBFORMAT_IN_FORMAT_ELEMENT", element_name,"\n" + "\n".join(stack[-2:-1]))], stream='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(errors[0][1]) if errors: if verbose >= 7: raise Exception, errors[0][1] return None # Load function 'format()' inside element try: function_format = module.__dict__[module_name].format format_element['code'] = function_format except AttributeError, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_FORMAT_FUNCTION", element_name)], stream='warning', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(errors[0][1]) if errors: if verbose >= 7: raise Exception, errors[0][1] return None # Load function 'escape_values()' inside element function_escape = getattr(module.__dict__[module_name], 'escape_values', None) format_element['escape_function'] = function_escape # Prepare, cache and return format_element['attrs'] = get_format_element_attrs_from_function( \ function_format, element_name, with_built_in_params) format_element['type'] = "python" format_elements_cache[name] = format_element return format_element def get_format_elements(with_built_in_params=False): """ Returns the list of format elements attributes as dictionary structure Elements declared in files have priority over element declared in 'tag' table The returned object has this format: {element_name1: {'attrs': {'description':..., 'seealso':... 'params':[{'name':..., 'default':..., 'description':...}, ...] 'builtin_params':[{'name':..., 'default':..., 'description':...}, ...] }, 'code': code_of_the_element }, element_name2: {...}, ...} Returns only elements that could be loaded (not error in code) @return a dict of format elements with name as key, and a dict as attributes @param with_built_in_params if True, load the parameters built in all elements """ format_elements = {} mappings = bibformat_dblayer.get_all_name_tag_mappings() for name in mappings: format_elements[name.upper().replace(" ", "_").strip()] = get_format_element(name, with_built_in_params=with_built_in_params) files = os.listdir(CFG_BIBFORMAT_ELEMENTS_PATH) for filename in files: filename_test = filename.upper().replace(" ", "_") if filename_test.endswith(".PY") and filename.upper() != "__INIT__.PY": if filename_test.startswith("BFE_"): filename_test = filename_test[4:] element_name = filename_test[:-3] element = get_format_element(element_name, with_built_in_params=with_built_in_params) if element is not None: format_elements[element_name] = element return format_elements def get_format_element_attrs_from_function(function, element_name, with_built_in_params=False): """ Returns the attributes of the function given as parameter. It looks for standard parameters of the function, default values and comments in the docstring. The attributes are {'description', 'seealso':['element.py', ...], 'params':{name:{'name', 'default', 'description'}, ...], name2:{}} The attributes are {'name' : "name of element" #basically the name of 'name' parameter 'description': "a string description of the element", 'seealso' : ["element_1.py", "element_2.py", ...] #a list of related elements 'params': [{'name':"param_name", #a list of parameters for this element (except 'bfo') 'default':"default value", 'description': "a description"}, ...], 'builtin_params': {name: {'name':"param_name",#the parameters builtin for all elem of this kind 'default':"default value", 'description': "a description"}, ...}, } @param function the formatting function of a format element @param element_name the name of the element @param with_built_in_params if True, load the parameters built in all elements """ attrs = {} attrs['description'] = "" attrs['name'] = element_name.replace(" ", "_").upper() attrs['seealso'] = [] docstring = function.__doc__ if isinstance(docstring, str): # Look for function description in docstring #match = pattern_format_element_desc.search(docstring) description = docstring.split("@param")[0] description = description.split("@see")[0] attrs['description'] = description.strip().rstrip('.') # Look for @see in docstring match = pattern_format_element_seealso.search(docstring) if match is not None: elements = match.group('see').rstrip('.').split(",") for element in elements: attrs['seealso'].append(element.strip()) params = {} # Look for parameters in function definition (args, varargs, varkw, defaults) = inspect.getargspec(function) # Prepare args and defaults_list such that we can have a mapping # from args to defaults args.reverse() if defaults is not None: defaults_list = list(defaults) defaults_list.reverse() else: defaults_list = [] for arg, default in map(None, args, defaults_list): if arg == "bfo": #Don't keep this as parameter. It is hidden to users, and #exists in all elements of this kind continue param = {} param['name'] = arg if default is None: #In case no check is made inside element, we prefer to #print "" (nothing) than None in output param['default'] = "" else: param['default'] = default param['description'] = "(no description provided)" params[arg] = param if isinstance(docstring, str): # Look for @param descriptions in docstring. # Add description to existing parameters in params dict params_iterator = pattern_format_element_params.finditer(docstring) for match in params_iterator: name = match.group('name') if params.has_key(name): params[name]['description'] = match.group('desc').rstrip('.') attrs['params'] = params.values() # Load built-in parameters if necessary if with_built_in_params: builtin_params = [] # Add 'prefix' parameter param_prefix = {} param_prefix['name'] = "prefix" param_prefix['default'] = "" param_prefix['description'] = """A prefix printed only if the record has a value for this element""" builtin_params.append(param_prefix) # Add 'suffix' parameter param_suffix = {} param_suffix['name'] = "suffix" param_suffix['default'] = "" param_suffix['description'] = """A suffix printed only if the record has a value for this element""" builtin_params.append(param_suffix) # Add 'default' parameter param_default = {} param_default['name'] = "default" param_default['default'] = "" param_default['description'] = """A default value printed if the record has no value for this element""" builtin_params.append(param_default) # Add 'escape' parameter param_escape = {} param_escape['name'] = "escape" param_escape['default'] = "" param_escape['description'] = """If set to 1, replaces special characters '&', '<' and '>' of this element by SGML entities""" builtin_params.append(param_escape) attrs['builtin_params'] = builtin_params return attrs def get_format_element_attrs_from_table(element_name, with_built_in_params=False): """ Returns the attributes of the format element with given name in 'tag' table. Returns None if element_name does not exist in tag table. The attributes are {'name' : "name of element" #basically the name of 'element_name' parameter 'description': "a string description of the element", 'seealso' : [] #a list of related elements. Always empty in this case 'params': [], #a list of parameters for this element. Always empty in this case 'builtin_params': [{'name':"param_name", #the parameters builtin for all elem of this kind 'default':"default value", 'description': "a description"}, ...], 'tags':["950.1", 203.a] #the list of tags printed by this element } @param element_name an element name in database @param element_name the name of the element @param with_built_in_params if True, load the parameters built in all elements """ attrs = {} tags = bibformat_dblayer.get_tags_from_name(element_name) field_label = "field" if len(tags)>1: field_label = "fields" attrs['description'] = "Prints %s %s of the record" % (field_label, ", ".join(tags)) attrs['name'] = element_name.replace(" ", "_").upper() attrs['seealso'] = [] attrs['params'] = [] attrs['tags'] = tags # Load built-in parameters if necessary if with_built_in_params: builtin_params = [] # Add 'prefix' parameter param_prefix = {} param_prefix['name'] = "prefix" param_prefix['default'] = "" param_prefix['description'] = """A prefix printed only if the record has a value for this element""" builtin_params.append(param_prefix) # Add 'suffix' parameter param_suffix = {} param_suffix['name'] = "suffix" param_suffix['default'] = "" param_suffix['description'] = """A suffix printed only if the record has a value for this element""" builtin_params.append(param_suffix) # Add 'separator' parameter param_separator = {} param_separator['name'] = "separator" param_separator['default'] = " " param_separator['description'] = """A separator between elements of the field""" builtin_params.append(param_separator) # Add 'nbMax' parameter param_nbMax = {} param_nbMax['name'] = "nbMax" param_nbMax['default'] = "" param_nbMax['description'] = """The maximum number of values to print for this element. No limit if not specified""" builtin_params.append(param_nbMax) # Add 'default' parameter param_default = {} param_default['name'] = "default" param_default['default'] = "" param_default['description'] = """A default value printed if the record has no value for this element""" builtin_params.append(param_default) # Add 'escape' parameter param_escape = {} param_escape['name'] = "escape" param_escape['default'] = "" param_escape['description'] = """If set to 1, replaces special characters '&', '<' and '>' of this element by SGML entities""" builtin_params.append(param_escape) attrs['builtin_params'] = builtin_params return attrs def get_output_format(code, with_attributes=False, verbose=0): """ Returns the structured content of the given output format If 'with_attributes' is true, also returns the names and description of the output formats, else 'attrs' is not returned in dict (it might, if it has already been loaded previously). if output format corresponding to 'code' is not found return an empty structure. See get_output_format_attrs() to learn more on the attributes {'rules': [ {'field': "980__a", 'value': "PREPRINT", 'template': "filename_a.bft", }, {...} ], 'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1", 'content_type': "application/ms-excel", 'visibility': 1 } 'default':"filename_b.bft" } @param code the code of an output_format @param with_attributes if True, fetch the attributes (names and description) for format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return strucured content of output format """ output_format = {'rules':[], 'default':""} filename = resolve_output_format_filename(code, verbose) if filename is None: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_OUTPUT_FORMAT_CODE_UNKNOWN", code)], stream='error', ln=cdslang) register_errors(errors, 'error') if with_attributes: #Create empty attrs if asked for attributes output_format['attrs'] = get_output_format_attrs(code, verbose) return output_format # Get from cache whenever possible global format_outputs_cache if format_outputs_cache.has_key(filename): # If was must return with attributes but cache has not # attributes, then load attributes if with_attributes and not \ format_outputs_cache[filename].has_key('attrs'): format_outputs_cache[filename]['attrs'] = get_output_format_attrs(code, verbose) return format_outputs_cache[filename] try: if with_attributes: output_format['attrs'] = get_output_format_attrs(code, verbose) path = "%s%s%s" % (CFG_BIBFORMAT_OUTPUTS_PATH, os.sep, filename ) format_file = open(path) current_tag = '' for line in format_file: line = line.strip() if line == "": # Ignore blank lines continue if line.endswith(":"): # Retrieve tag # Remove : spaces and eol at the end of line clean_line = line.rstrip(": \n\r") # The tag starts at second position current_tag = "".join(clean_line.split()[1:]).strip() elif line.find('---') != -1: words = line.split('---') template = words[-1].strip() condition = ''.join(words[:-1]) value = "" output_format['rules'].append({'field': current_tag, 'value': condition, 'template': template, }) elif line.find(':') != -1: # Default case default = line.split(':')[1].strip() output_format['default'] = default except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_OUTPUT_FILE", filename, str(e))], stream='error', ln=cdslang) register_errors(errors, 'error') # Cache and return format_outputs_cache[filename] = output_format return output_format def get_output_format_attrs(code, verbose=0): """ Returns the attributes of an output format. The attributes contain 'code', which is the short identifier of the output format (to be given as parameter in format_record function to specify the output format), 'description', a description of the output format, 'visibility' the visibility of the format in the output format list on public pages and 'names', the localized names of the output format. If 'content_type' is specified then the search_engine will send a file with this content type and with result of formatting as content to the user. The 'names' dict always contais 'generic', 'ln' (for long name) and 'sn' (for short names) keys. 'generic' is the default name for output format. 'ln' and 'sn' contain long and short localized names of the output format. Only the languages for which a localization exist are used. {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1", 'content_type': "application/ms-excel", 'visibility': 1 } @param code the short identifier of the format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return strucured content of output format attributes """ if code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION): code = code[:-(len(CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION) + 1)] attrs = {'names':{'generic':"", 'ln':{}, 'sn':{}}, 'description':'', 'code':code.upper(), 'content_type':"", 'visibility':1} filename = resolve_output_format_filename(code, verbose) if filename is None: return attrs attrs['names'] = bibformat_dblayer.get_output_format_names(code) attrs['description'] = bibformat_dblayer.get_output_format_description(code) attrs['content_type'] = bibformat_dblayer.get_output_format_content_type(code) attrs['visibility'] = bibformat_dblayer.get_output_format_visibility(code) return attrs def get_output_formats(with_attributes=False): """ Returns the list of all output format, as a dictionary with their filename as key If 'with_attributes' is true, also returns the names and description of the output formats, else 'attrs' is not returned in dicts (it might, if it has already been loaded previously). See get_output_format_attrs() to learn more on the attributes {'filename_1.bfo': {'rules': [ {'field': "980__a", 'value': "PREPRINT", 'template': "filename_a.bft", }, {...} ], 'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1" } 'default':"filename_b.bft" }, 'filename_2.bfo': {...}, ... } @return the list of output formats """ output_formats = {} files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH) for filename in files: if filename.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION): code = "".join(filename.split(".")[:-1]) output_formats[filename] = get_output_format(code, with_attributes) return output_formats def get_kb_mapping(kb, string, default=""): """ Returns the value of the string' in the knowledge base 'kb'. If kb does not exist or string does not exist in kb, returns 'default' string value. @param kb a knowledge base name @param string a key in a knowledge base @param default a default value if 'string' is not in 'kb' @return the value corresponding to the given string in given kb """ global kb_mappings_cache if kb_mappings_cache.has_key(kb): kb_cache = kb_mappings_cache[kb] if kb_cache.has_key(string): value = kb_mappings_cache[kb][string] if value is None: return default else: return value else: # Precreate for caching this kb kb_mappings_cache[kb] = {} value = bibformat_dblayer.get_kb_mapping_value(kb, string) kb_mappings_cache[kb][str(string)] = value if value is None: return default else: return value def resolve_format_element_filename(string): """ Returns the filename of element corresponding to string This is necessary since format templates code call elements by ignoring case, for eg. is the same as . It is also recommended that format elements filenames are prefixed with bfe_ . We need to look for these too. The name of the element has to start with "BFE_". @param name a name for a format element @return the corresponding filename, with right case """ if not string.endswith(".py"): name = string.replace(" ", "_").upper() +".PY" else: name = string.replace(" ", "_").upper() files = os.listdir(CFG_BIBFORMAT_ELEMENTS_PATH) for filename in files: test_filename = filename.replace(" ", "_").upper() if test_filename == name or \ test_filename == "BFE_" + name or \ "BFE_" + test_filename == name: return filename # No element with that name found # Do not log error, as it might be a normal execution case: # element can be in database return None def resolve_output_format_filename(code, verbose=0): """ Returns the filename of output corresponding to code This is necessary since output formats names are not case sensitive but most file systems are. @param code the code for an output format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return the corresponding filename, with right case, or None if not found """ #Remove non alphanumeric chars (except .) code = re.sub(r"[^.0-9a-zA-Z]", "", code) if not code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION): code = re.sub(r"\W", "", code) code += "."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH) for filename in files: if filename.upper() == code.upper(): return filename # No output format with that name found errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_OUTPUT_NAME", code)], stream='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(errors[0][1]) if verbose >= 9: sys.exit(errors[0][1]) return None def get_fresh_format_template_filename(name): """ Returns a new filename and name for template with given name. Used when writing a new template to a file, so that the name has no space, is unique in template directory Returns (unique_filename, modified_name) @param a name for a format template @return the corresponding filename, and modified name if necessary """ #name = re.sub(r"\W", "", name) #Remove non alphanumeric chars name = name.replace(" ", "_") filename = name # Remove non alphanumeric chars (except .) filename = re.sub(r"[^.0-9a-zA-Z]", "", filename) path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename \ + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION index = 1 while os.path.exists(path): index += 1 filename = name + str(index) path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename \ + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION if index > 1: returned_name = (name + str(index)).replace("_", " ") else: returned_name = name.replace("_", " ") return (filename + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION, returned_name) #filename.replace("_", " ")) def get_fresh_output_format_filename(code): """ Returns a new filename for output format with given code. Used when writing a new output format to a file, so that the code has no space, is unique in output format directory. The filename also need to be at most 6 chars long, as the convention is that filename == output format code (+ .extension) We return an uppercase code Returns (unique_filename, modified_code) @param code the code of an output format @return the corresponding filename, and modified code if necessary """ #code = re.sub(r"\W", "", code) #Remove non alphanumeric chars code = code.upper().replace(" ", "_") # Remove non alphanumeric chars (except .) code = re.sub(r"[^.0-9a-zA-Z]", "", code) if len(code) > 6: code = code[:6] filename = code path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \ + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION index = 2 while os.path.exists(path): filename = code + str(index) if len(filename) > 6: filename = code[:-(len(str(index)))]+str(index) index += 1 path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \ + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION # We should not try more than 99999... Well I don't see how we # could get there.. Sanity check. if index >= 99999: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_NB_OUTPUTS_LIMIT_REACHED", code)], stream='error', ln=cdslang) register_errors(errors, 'error') sys.exit("Output format cannot be named as %s"%code) return (filename + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, filename) def clear_caches(): """ Clear the caches (Output Format, Format Templates and Format Elements) """ global format_templates_cache, format_elements_cache , \ format_outputs_cache, kb_mappings_cache format_templates_cache = {} format_elements_cache = {} format_outputs_cache = {} kb_mappings_cache = {} class BibFormatObject: """ An object that encapsulates a record and associated methods, and that is given as parameter to all format elements 'format' function. The object is made specifically for a given formatting, i.e. it includes for example the language for the formatting. The object provides basic accessors to the record. For full access, one can get the record with get_record() and then use BibRecord methods on the returned object. """ # The record record = None # The language in which the formatting has to be done lang = cdslang # A list of string describing the context in which the record has # to be formatted. # It represents the words of the user request in web interface search search_pattern = [] # The id of the record recID = 0 # The user id of the person who will view the formatted page (if applicable) # This allows for example to print a "edit record" link for people # who have right to edit a record. uid = None # The format in which the record is being formatted format = '' # The mod_python request object req = None def __init__(self, recID, ln=cdslang, search_pattern=[], xml_record=None, uid=None, format='', req=None): """ Creates a new bibformat object, with given record. You can either specify an record ID to format, or give its xml representation. if 'xml_record' is not None, use 'xml_record' instead of recID for the record. 'uid' allows to grant access to some functionalities on a page depending on the user's priviledges. @param recID the id of a record @param ln the language in which the record has to be formatted @param search_pattern list of string representing the request used by the user in web interface @param xml_record a xml string of the record to format @param uid the user id of the person who will view the formatted page @param format the format used for formatting this record """ if xml_record is not None: # If record is given as parameter self.record = create_record(xml_record)[0] + # raise repr(create_record(xml_record.decode('utf-8').encode('utf-8'))) recID = record_get_field_value(self.record,"001") self.lang = wash_language(ln) self.search_pattern = search_pattern self.recID = recID self.uid = uid self.format = format self.req = req def get_record(self): """ Returns the record of this BibFormatObject instance @return the record structure as returned by BibRecord """ # Create record if necessary if self.record is None: # on-the-fly creation if current output is xm record = create_record(record_get_xml(self.recID, 'xm', on_the_fly=(self.format.lower() == 'xm'))) self.record = record[0] return self.record def control_field(self, tag, escape=0): """ Returns the value of control field given by tag in record @param tag the marc code of a field @param escape 1 if returned value should be escaped. Else 0. @return value of field tag in record """ if self.get_record() is None: #Case where BibRecord could not parse object return '' p_tag = parse_tag(tag) field_value = record_get_field_value(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) if escape == 0: return field_value else: return escape_field(field_value, escape) def field(self, tag, escape=0): """ Returns the value of the field corresponding to tag in the current record. If the value does not exist, return empty string 'escape' parameter allows to escape special characters of the field. The value of escape can be: 0 - no escaping 1 - escape all HTML characters 2 - escape all HTML characters by default. If field starts with , escape only unsafe characters, but leave basic HTML tags. @param tag the marc code of a field @param escape 1 if returned value should be escaped. Else 0. (see above for other modes) @return value of field tag in record """ list_of_fields = self.fields(tag) if len(list_of_fields) > 0: # Escaping below if escape == 0: return list_of_fields[0] else: return escape_field(list_of_fields[0], escape) else: return "" def fields(self, tag, escape=0, repeatable_subfields_p=False): """ Returns the list of values corresonding to "tag". If tag has an undefined subcode (such as 999C5), the function returns a list of dictionaries, whoose keys are the subcodes and the values are the values of tag.subcode. If the tag has a subcode, simply returns list of values corresponding to tag. Eg. for given MARC: 999C5 $a value_1a $b value_1b 999C5 $b value_2b 999C5 $b value_3b $b value_3b_bis >> bfo.fields('999C5b') >> ['value_1b', 'value_2b', 'value_3b', 'value_3b_bis'] >> bfo.fields('999C5') >> [{'a':'value_1a', 'b':'value_1b'}, {'b':'value_2b'}, {'b':'value_3b'}] By default the function returns only one value for each subfield (that is it considers that repeatable subfields are not allowed). It is why in the above example 'value3b_bis' is not shown for bfo.fields('999C5'). (Note that it is not defined which of value_3b or value_3b_bis is returned). This is to simplify the use of the function, as most of the time subfields are not repeatable (in that way we get a string instead of a list). You can allow repeatable subfields by setting 'repeatable_subfields_p' parameter to True. In this mode, the above example would return: >> bfo.fields('999C5b', repeatable_subfields_p=True) >> ['value_1b', 'value_2b', 'value_3b'] >> bfo.fields('999C5', repeatable_subfields_p=True) >> [{'a':['value_1a'], 'b':['value_1b']}, {'b':['value_2b']}, {'b':['value_3b', 'value3b_bis']}] NOTICE THAT THE RETURNED STRUCTURE IS DIFFERENT. Also note that whatever the value of 'repeatable_subfields_p' is, bfo.fields('999C5b') always show all fields, even repeatable ones. This is because the parameter has no impact on the returned structure (it is always a list). 'escape' parameter allows to escape special characters of the fields. The value of escape can be: 0 - no escaping 1 - escape all HTML characters 2 - escape all dangerous HTML tags. 3 - Mix of mode 1 and 2. If value of field starts with , then use mode 2. Else use mode 1. 4 - Remove all HTML tags @param tag the marc code of a field @param escape 1 if returned values should be escaped. Else 0. @repeatable_subfields_p if True, returns the list of subfields in the dictionary @return values of field tag in record """ if self.get_record() is None: # Case where BibRecord could not parse object return [] p_tag = parse_tag(tag) if p_tag[3] != "": # Subcode has been defined. Simply returns list of values values = record_get_field_values(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) if escape == 0: return values else: return [escape_field(value, escape) for value in values] else: # Subcode is undefined. Returns list of dicts. # However it might be the case of a control field. instances = record_get_field_instances(self.get_record(), p_tag[0], p_tag[1], p_tag[2]) if repeatable_subfields_p: list_of_instances = [] for instance in instances: instance_dict = {} for subfield in instance[0]: if not instance_dict.has_key(subfield[0]): instance_dict[subfield[0]] = [] if escape == 0: instance_dict[subfield[0]].append(subfield[1]) else: instance_dict[subfield[0]].append(escape_field(subfield[1], escape)) list_of_instances.append(instance_dict) return list_of_instances else: if escape == 0: return [dict(instance[0]) for instance in instances] else: return [dict([ (subfield[0], escape_field(subfield[1], escape)) \ for subfield in instance[0] ]) \ for instance in instances] def kb(self, kb, string, default=""): """ Returns the value of the "string" in the knowledge base "kb". If kb does not exist or string does not exist in kb, returns 'default' string or empty string if not specified. @param kb a knowledge base name @param string the string we want to translate @param default a default value returned if 'string' not found in 'kb' """ if string is None: return default val = get_kb_mapping(kb, string, default) if val is None: return default else: return val def escape_field(value, mode=0): """ Utility function used to escape the value of a field in given mode. - mode 0: no escaping - mode 1: escaping all HTML/XML characters (escaped chars are shown as escaped) - mode 2: escaping dangerous HTML tags to avoid XSS, but keep basic one (such as
) Escaped characters are removed. - mode 3: mix of mode 1 and mode 2. If field_value starts with , then use mode 2. Else use mode 1. - mode 4: escaping all HTML/XML tags (escaped tags are removed) - """ if mode == 1: return cgi.escape(value) elif mode == 2: return washer.wash(value, allowed_attribute_whitelist=['href', 'name', 'class'] ) elif mode == 3: if value.lstrip(' \n').startswith(html_field): return washer.wash(value, allowed_attribute_whitelist=['href', 'name', 'class'] ) else: return cgi.escape(value) elif mode == 4: return washer.wash(value, allowed_attribute_whitelist=[], allowed_tag_whitelist=[] ) else: return value def bf_profile(): """ Runs a benchmark """ for i in range(1, 51): format_record(i, "HD", ln=cdslang, verbose=9, search_pattern=[]) return if __name__ == "__main__": import profile import pstats #bf_profile() profile.run('bf_profile()', "bibformat_profile") p = pstats.Stats("bibformat_profile") p.strip_dirs().sort_stats("cumulative").print_stats() diff --git a/modules/Makefile.am b/modules/webjournal/Makefile.am similarity index 78% copy from modules/Makefile.am copy to modules/webjournal/Makefile.am index 1dd7a6696..951f5c8c2 100644 --- a/modules/Makefile.am +++ b/modules/webjournal/Makefile.am @@ -1,22 +1,22 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bibclassify bibconvert bibedit bibharvest bibmatch bibsched bibindex bibrank bibupload bibformat elmsubmit miscutil webstyle websession webhelp webbasket webalert websearch websubmit webaccess webmessage webstat webcomment +SUBDIRS = lib CLEANFILES = *~ diff --git a/modules/Makefile.am b/modules/webjournal/doc/Makefile.am similarity index 76% copy from modules/Makefile.am copy to modules/webjournal/doc/Makefile.am index 1dd7a6696..4de688592 100644 --- a/modules/Makefile.am +++ b/modules/webjournal/doc/Makefile.am @@ -1,22 +1,20 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bibclassify bibconvert bibedit bibharvest bibmatch bibsched bibindex bibrank bibupload bibformat elmsubmit miscutil webstyle websession webhelp webbasket webalert websearch websubmit webaccess webmessage webstat webcomment - -CLEANFILES = *~ +SUBDIRS = admin hacking diff --git a/modules/Makefile.am b/modules/webjournal/doc/admin/Makefile.am similarity index 76% copy from modules/Makefile.am copy to modules/webjournal/doc/admin/Makefile.am index 1dd7a6696..bb2e6c873 100644 --- a/modules/Makefile.am +++ b/modules/webjournal/doc/admin/Makefile.am @@ -1,22 +1,20 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bibclassify bibconvert bibedit bibharvest bibmatch bibsched bibindex bibrank bibupload bibformat elmsubmit miscutil webstyle websession webhelp webbasket webalert websearch websubmit webaccess webmessage webstat webcomment - -CLEANFILES = *~ +docdir = $(localstatedir)/www/admin/webjournal diff --git a/modules/Makefile.am b/modules/webjournal/doc/hacking/Makefile.am similarity index 76% copy from modules/Makefile.am copy to modules/webjournal/doc/hacking/Makefile.am index 1dd7a6696..a1018a757 100644 --- a/modules/Makefile.am +++ b/modules/webjournal/doc/hacking/Makefile.am @@ -1,22 +1,20 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bibclassify bibconvert bibedit bibharvest bibmatch bibsched bibindex bibrank bibupload bibformat elmsubmit miscutil webstyle websession webhelp webbasket webalert websearch websubmit webaccess webmessage webstat webcomment - -CLEANFILES = *~ +docdir = $(localstatedir)/www/hacking/webjournal diff --git a/modules/Makefile.am b/modules/webjournal/lib/Makefile.am similarity index 71% copy from modules/Makefile.am copy to modules/webjournal/lib/Makefile.am index 1dd7a6696..fb9f1385a 100644 --- a/modules/Makefile.am +++ b/modules/webjournal/lib/Makefile.am @@ -1,22 +1,33 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bibclassify bibconvert bibedit bibharvest bibmatch bibsched bibindex bibrank bibupload bibformat elmsubmit miscutil webstyle websession webhelp webbasket webalert websearch websubmit webaccess webmessage webstat webcomment +SUBDIRS = widgets -CLEANFILES = *~ +pylibdir = $(libdir)/python/invenio + +formatdir = $(pylibdir)/bibformat_elements + +pylib_DATA = webjournal_webinterface.py webjournal_utils.py webjournal_config.py +format_DATA = bfe_ObjectLanguage.py + +#noinst_DATA = webbasket_migration_kit.py + +EXTRA_DIST = $(pylib_DATA) #$(noinst_DATA) + +CLEANFILES = *~ *.tmp *.pyc diff --git a/modules/webjournal/lib/bfe_ObjectLanguage.py b/modules/webjournal/lib/bfe_ObjectLanguage.py new file mode 100644 index 000000000..b08163ff9 --- /dev/null +++ b/modules/webjournal/lib/bfe_ObjectLanguage.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python + +#from invenio.messages import gettext_set_language + +def format(bfo): + """ + just returns the language string of the object. + """ + return bfo.lang + +def escape_values(bfo): + """ + """ + return 0 \ No newline at end of file diff --git a/modules/webjournal/lib/webjournal_config.py b/modules/webjournal/lib/webjournal_config.py new file mode 100644 index 000000000..8869ebce6 --- /dev/null +++ b/modules/webjournal/lib/webjournal_config.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +from invenio.config import adminemail, supportemail, etcdir, weburl, cdslang +from invenio.messages import gettext_set_language +from invenio.webpage import page +from invenio.webjournal_utils import parse_url_string +import os + +class InvenioWebJournalNoIndexTemplateError(Exception): + """Exception if no index template is specified in the config.""" + def __init__(self, journal_name): + """Initialisation.""" + self.journal = journal_name + def __str__(self): + """String representation.""" + return 'Admin did not provide a template for the index page of journal: %s. \ + The path to such a file should be given in the config.xml of this journal \ + under the tag ...' % repr(self.journal) + +class InvenioWebJournalNoArticleRuleError(Exception): + """ + Exception if there are no article type rules defined. + """ + def __init__(self, journal_name): + """ + Initialisation. + """ + self.journal = journal_name + def __str__(self): + """ + String representation. + """ + return 'The config.xml file for journal: %s does not contain any article \ + rules. These rules are needed to associate collections from your Invenio \ + installation to navigable article types. A rule should have the form of \ + NameOfArticleType, marc_tag:ExpectedContentOfMarcTag' % repr(self.journal) + +class InvenioWebJournalNoIssueNumberTagError(Exception): + """ + Exception if there is no marc tag for issue number defined. + """ + def __init__(self, journal_name): + """ + Initialisation. + """ + self.journal = journal_name + def __str__(self): + """ + String representation. + """ + return 'The config.xml file for journal: %s does not contain a marc tag \ + to deduce the issue number from. WebJournal is an issue number based \ + system, meaning you have to give some form of numbering system in a \ + dedicated marc tag, so the system can see which is the active journal \ + publication of the date.' % repr(self.journal) + +class InvenioWebJournalNoArticleNumberError(Exception): + """ + Exception if an article was called without its order number. + """ + def __init__(self, journal_name): + """ + Initialisation. + """ + self.journal = journal_name + + def __str__(self): + """ + String representation. + """ + return 'In Journal %s an article was called without specifying the order \ + of this article in the issue. This parameter is mandatory and should be \ + provided by internal links in any case. Maybe this was a bad direct url \ + hack. Check where the request came from.' % repr(self.journal) + +class InvenioWebJournalNoArticleTemplateError(Exception): + """ + Exception if an article was called without its order number. + """ + def __init__(self, journal_name): + """ + Initialisation. + """ + self.journal = journal_name + + def __str__(self): + """ + String representation. + """ + return 'Admin did not provide a template for the article view page of journal: %s. \ + The path to such a file should be given in the config.xml of this journal \ + under the tag ...' % repr(self.journal) + +def webjournal_missing_info_box(req, title, msg_title, msg): + """ + returns a box indicating that the given journal was not found on the + server, leaving the opportunity to select an existing journal from a list. + """ + params = parse_url_string(req) + try: + language = params["ln"] + except: + language = cdslang + _ = gettext_set_language(language) + title = _(title) + box_title = _(msg_title) + box_text = _(msg) + box_list_title = _("Available Journals") + find_journals = lambda path: [entry for entry in os.listdir(str(path)) if os.path.isdir(str(path)+str(entry))] + all_journals = find_journals('%s/webjournal/' % etcdir) + + box = '''
+
+ %s +

%s

+

%s

+
    + %s +
+
+ +
+
+ ''' % (weburl, + box_title, + box_text, + box_list_title, + "".join(['
  • %s
  • ' % (weburl, journal, journal) for journal in all_journals]), + adminemail) + return page(title=title, body=box) + +def webjournal_error_box(req, title, title_msg, msg): + """ + """ + params = parse_url_string(req) + try: + language = params["ln"] + except: + language = cdslang + _ = gettext_set_language(language) + title = _(title) + title_msg = _(title_msg) + msg = _(msg) + box = '''
    +
    + %s +

    %s

    +
    + +
    +
    + ''' % (weburl, title_msg, msg, supportemail) + return page(title=title, body=box) + diff --git a/modules/webjournal/lib/webjournal_utils.py b/modules/webjournal/lib/webjournal_utils.py new file mode 100644 index 000000000..0d6203e6f --- /dev/null +++ b/modules/webjournal/lib/webjournal_utils.py @@ -0,0 +1,540 @@ +#!/usr/bin/env python + +from invenio.bibformat_engine import BibFormatObject +from invenio.errorlib import register_exception +from invenio.search_engine import search_pattern +from invenio.config import etcdir, weburl, adminemail +from invenio.messages import gettext_set_language +from invenio.webpage import page +from invenio.dbquery import run_sql +from xml.dom import minidom +from urllib2 import urlopen +import time +import re + +def get_order_dict_from_recid_list(list, issue_number): + """ + this is a centralized function that takes a list of recid's and brings it in + order using a centralized algorithm. this always has to be in sync with + the reverse function get_recid_from_order(order) + + parameters: + list: a list of all recid's that should be brought into order + issue_number: the issue_number for which we are deriving the order + (this has to be one number) + + returns: + ordered_records: a dictionary with the recids ordered by keys + """ + ordered_records = {} + for record in list: + temp_rec = BibFormatObject(record) + issue_numbers = temp_rec.fields('773__n') + order_number = temp_rec.fields('773__c') + # todo: the marc fields have to be set'able by some sort of config interface + n = 0 + for temp_issue in issue_numbers: + if temp_issue == issue_number: + try: + order_number = int(order_number[n]) + except: + # todo: Warning, record does not support numbering scheme + order_number = -1 + n+=1 + if order_number != -1: + try: + ordered_records[order_number] = record + except: + pass + # todo: Error, there are two records with the same order_number in the issue + else: + ordered_records[max(ordered_records.keys()) + 1] = record + + return ordered_records + +def get_order_dict_from_recid_list_CERNBulletin(list, issue_number): + """ + special derivative of the get_order_dict_from_recid_list function that + extends the behavior insofar as too return a dictionary in which every + entry is a dict (there can be several number 1 articles) and every dict entry + is a tuple with an additional boolean to indicate if there is a graphical "new" + flag. the dict key on the second level is the upload time in epoch seconds. + e.g. + {1:{10349:(rec, true), 24792:(rec, false)}, 2:{736424:(rec,false)}, 24791:{1:(rec:false}} + the ordering inside an order number is given by upload date. so it is an ordering + 1-level -> number + 2-level -> date + """ + ordered_records = {} + for record in list: + temp_rec = BibFormatObject(record) + issue_numbers = temp_rec.fields('773__n') + order_number = temp_rec.fields('773__c') + try: +# upload_date = run_sql("SELECT modification_date FROM bibrec WHERE id=%s", (record, ))[0][0] + upload_date = run_sql("SELECT creation_date FROM bibrec WHERE id=%s", (record, ))[0][0] + except: + pass + #return repr(time.mktime(upload_date.timetuple())) + # todo: the marc fields have to be set'able by some sort of config interface + n = 0 + for temp_issue in issue_numbers: + if temp_issue == issue_number: + try: + order_number = int(order_number[n]) + except: + # todo: Warning, record does not support numbering scheme + order_number = -1 + n+=1 + if order_number != -1: + try: + if ordered_records.has_key(order_number): + ordered_records[order_number][int(time.mktime(upload_date.timetuple()))] = (record, True) + else: + ordered_records[order_number] = {int(time.mktime(upload_date.timetuple())):(record, False)} + except: + pass + # todo: Error, there are two records with the same order_number in the issue + else: + ordered_records[max(ordered_records.keys()) + 1] = record + + return ordered_records + +def get_records_in_same_issue_in_order(recid): + """ + """ + raise ("Not implemented yet.") + +def get_recid_from_order(order, rule, issue_number): + """ + takes the order of a record in the journal as passed in the url arguments + and derives the recid using the current issue number and the record + rule for this kind of records. + + parameters: + order: the order at which the record appears in the journal as passed + in the url + rule: the defining rule of the journal record category + issue_number: the issue number for which we are searching + + returns: + recid: the recid of the ordered record + """ + # get the id list + all_records = list(search_pattern(p="%s and 773__n:%s" % + (rule, issue_number), + f="&action_search=Search")) + ordered_records = {} + for record in all_records: + temp_rec = BibFormatObject(record) + issue_numbers = temp_rec.fields('773__n') + order_number = temp_rec.fields('773__c') + # todo: fields for issue number and order number have to become generic + n = 0 + for temp_issue in issue_numbers: + if temp_issue == issue_number: + try: + order_number = int(order_number[n]) + except: + # todo: Warning, record does not support numbering scheme + order_number = -1 + n+=1 + + if order_number != -1: + try: + ordered_records[order_number] = record + except: + pass + # todo: Error, there are two records with the same order_number in the issue + else: + ordered_records[max(ordered_records.keys()) + 1] = record + try: + recid = ordered_records[int(order)] + except: + pass + # todo: ERROR, numbering scheme inconsistency + return recid + +def get_recid_from_order_CERNBulletin(order, rule, issue_number): + """ + same functionality as get_recid_from_order above, but extends it for + the CERN Bulletin in a way so multiple entries for the first article are + possible. + + parameters: + order: the order at which the record appears in the journal as passed + in the url + rule: the defining rule of the journal record category + issue_number: the issue number for which we are searching + + returns: + recid: the recid of the ordered record + """ + # get the id list + all_records = list(search_pattern(p="%s and 773__n:%s" % + (rule, issue_number), + f="&action_search=Search")) + ordered_records = {} + new_addition_records = [] + for record in all_records: + temp_rec = BibFormatObject(record) + issue_numbers = temp_rec.fields('773__n') + order_number = temp_rec.fields('773__c') + # todo: fields for issue number and order number have to become generic + n = 0 + for temp_issue in issue_numbers: + if temp_issue == issue_number: + try: + order_number = int(order_number[n]) + except: + register_exception(stream="warning", verbose_description="There \ + was an article in the journal that does not support \ + a numbering scheme") + order_number = -1000 + n+=1 + if order_number == -1000: + ordered_records[max(ordered_records.keys()) + 1] = record + elif order_number <= 1: + new_addition_records.append(record) + else: + try: + ordered_records[order_number] = record + except: + register_exception(stream='warning', verbose_description="There \ + were double entries for an order in this journal.") + + # process the CERN Bulletin specific new additions + if len(new_addition_records) > 1 and int(order) <= 1: + # if we are dealing with a new addition (order number smaller 1) + ordered_new_additions = {} + for record in new_addition_records: + #upload_date = run_sql("SELECT modification_date FROM bibrec WHERE id=%s", (record, ))[0][0] + upload_date = run_sql("SELECT creation_date FROM bibrec WHERE id=%s", (record, ))[0][0] + ordered_new_additions[int(time.mktime(upload_date.timetuple()))] = record + i = 1 + while len(ordered_new_additions) > 0: + temp_key = pop_oldest_article_CERNBulletin(ordered_new_additions) + record = ordered_new_additions.pop(int(temp_key)) + ordered_records[i] = record + i -=1 + else: + # if we have only one record on 1 just push it through + ordered_records[1] = new_addition_records[0] + try: + recid = ordered_records[int(order)] + except: + register_exception() + return recid + +def pop_newest_article_CERNBulletin(news_article_dict): + """ + pop key of the most recent article (highest c-timestamp) + """ + keys = news_article_dict.keys() + keys.sort() + key = keys[len(keys)-1] + return key + +def pop_oldest_article_CERNBulletin(news_article_dict): + """ + pop key of the oldest article (lowest c-timestamp) + """ + keys = news_article_dict.keys() + keys.sort() + key = keys[0] + return key + +def parse_url_string(req): + """ + centralized function to parse any url string given in webjournal. + + returns: + args: all arguments in dict form + """ + args = {} + # first get what you can from the argument string + try: + argument_string = req.args#"name=CERNBulletin&issue=22/2007"#req.args + except: + argument_string = "" + try: + arg_list = argument_string.split("&") + except: + # no arguments + arg_list = [] + for entry in arg_list: + try: + key = entry.split("=")[0] + except KeyError: + # todo: WARNING, could not parse one argument + continue + try: + val = entry.split("=")[1] + except: + # todo: WARNING, could not parse one argument + continue + try: + args[key] = val + except: + # todo: WARNING, argument given twice + continue + + # secondly try to get default arguments + try: + for entry in req.journal_defaults.keys(): + try: + args[entry] = req.journal_defaults[entry] + except: + # todo: Error, duplicate entry from args and defaults + pass + except: + # no defaults + pass + return args + +def get_xml_from_config(xpath_list, journal_name): + """ + wrapper for minidom.getElementsByTagName() + Takes a list of string expressions and a journal name and searches the config + file of this journal for the given xpath queries. Returns a dictionary with + a key for each query and a list of string (innerXml) results for each key. + Has a special field "config_fetching_error" that returns an error when + something has gone wrong. + """ + # get and open the config file + results = {} + config_path = '%s/webjournal/%s/config.xml' % (etcdir, journal_name) + config_file = minidom.Document + try: + config_file = minidom.parse("%s" % config_path) + except: + #todo: raise exception "error: no config file found" + results["config_fetching_error"] = "could not find config file" + return results + for xpath in xpath_list: + result_list = config_file.getElementsByTagName(xpath) + results[xpath] = [] + for result in result_list: + try: + result_string = result.firstChild.toxml() + except: + # WARNING, config did not have a value + continue + results[xpath].append(result_string) + return results + +def please_login(req, journal_name, ln="en", title="", message="", backlink=""): + """ + """ + _ = gettext_set_language(ln) + if title == "": + title_out = _("Please login to perform this action.") + else: + title_out = title + if message == "": + message_out = _("In order to publish webjournal issues you must be logged \ + in and be registered by your system administrator for \ + this kind of task. If you have a login, use the link \ + below to login.") + else: + message_out = message + + if backlink == "": + backlink_out = "%s/journal/issue_control?name=%s" % (weburl, journal_name) + else: + backlink_out = backlink + + title_msg = _("We need you to login") + body_out = '''
    +
    + %s +

    %s

    +
    +

    Login

    +
    + +
    +
    + ''' % (weburl, + title_msg, + message_out, + weburl, + backlink_out, + adminemail) + + return page(title = title_out, + body = body_out, + description = "", + keywords = "", + language = ln, + req = req) + +def get_current_issue(journal_name): + """ + checks the flat files for issue numbers of this journal and returns + the most recent issue number. + """ + try: + current_issue = open('%s/webjournal/%s/current_issue' % (etcdir, + journal_name)).read() + except: + #todo: Error, no current issue number, returning this week + return '%s/%s' (time.strptime("%U/%Y", time.localtime())) + issue_number = current_issue.split(" - ")[0].replace(" ", "") + return issue_number + +def cache_page(): + """ + """ + pass + +def get_rule_string_from_rule_list(rule_list, category): + """ + """ + i = 0 + current_category_in_list = 0 + for rule_string in rule_list: + category_from_config = rule_string.split(",")[0] + if category_from_config.lower() == category.lower(): + current_category_in_list = i + i+=1 + try: + rule_string = rule_list[current_category_in_list] + except: + rule_string = "" + # todo: exception + return rule_string + +def get_category_from_rule_string(rule_string): + """ + """ + pass + +def get_rule_string_from_category(category): + """ + """ + pass + +def get_monday_of_the_week(week_number, year): + """ + CERN Bulletin specific function that returns a string indicating the + Monday of each week as: Monday
    + """ + timetuple = time.strptime('1-%s-%s' % (week_number, year), "%w-%W-%Y") + return time.strftime("%A %d %B %Y", timetuple) + +def createhtmlmail (html, text, subject): + """Create a mime-message that will render HTML in popular + MUAs, text in better ones""" + import MimeWriter + import mimetools + import cStringIO + + out = cStringIO.StringIO() # output buffer for our message + htmlin = cStringIO.StringIO(html) + txtin = cStringIO.StringIO(text) + + writer = MimeWriter.MimeWriter(out) + # + # set up some basic headers... we put subject here + # because smtplib.sendmail expects it to be in the + # message body + # + writer.addheader("Subject", subject) + writer.addheader("MIME-Version", "1.0") + # + # start the multipart section of the message + # multipart/alternative seems to work better + # on some MUAs than multipart/mixed + # + writer.startmultipartbody("alternative") + writer.flushheaders() + # + # the plain text section + # + subpart = writer.nextpart() + subpart.addheader("Content-Transfer-Encoding", "quoted-printable") + #pout = subpart.startbody("text/plain", [("charset", 'us-ascii')]) + pout = subpart.startbody("text/plain", [("charset", 'utf-8')]) + mimetools.encode(txtin, pout, 'quoted-printable') + txtin.close() + # + # start the html subpart of the message + # + subpart = writer.nextpart() + subpart.addheader("Content-Transfer-Encoding", "quoted-printable") + txtin.close() + # + # start the html subpart of the message + # + subpart = writer.nextpart() + subpart.addheader("Content-Transfer-Encoding", "quoted-printable") + # + # returns us a file-ish object we can write to + # + #pout = subpart.startbody("text/html", [("charset", 'us-ascii')]) + pout = subpart.startbody("text/html", [("charset", 'utf-8')]) + mimetools.encode(htmlin, pout, 'quoted-printable') + htmlin.close() + # + # Now that we're done, close our writer and + # return the message body + # + writer.lastpart() + msg = out.getvalue() + out.close() + print msg + return msg + +def put_css_in_file(html_message, journal_name): + """ + """ + config_strings = get_xml_from_config(["screen"], journal_name) + try: + css_path = config_strings["screen"][0] + except: + register_exception(req=req, suffix="No css file for journal %s. Is this right?" % journal_name) + return + # todo: error handling on not found + css_file = urlopen('%s/%s' % (weburl, css_path)) + css = css_file.read() + css = make_full_paths_in_css(css, journal_name) + html_parted = html_message.split("") + if len(html_parted) > 1: + html = '%s%s' % (html_parted[0], + css, + html_parted[1]) + else: + html_parted = html_message.split("") + if len(html_parted) > 1: + html = '%s%s' % (html_parted[0], + css, + html_parted[1]) + else: + return "no html" + # todo: exception + + + return html + +def make_full_paths_in_css(css, journal_name): + """ + """ + url_pattern = re.compile('''url\(["']?\s*(?P\S*)\s*["']?\)''', re.DOTALL) + url_iter = url_pattern.finditer(css) + rel_to_full_path = {} + for url in url_iter: + url_string = url.group("url") + url_string = url_string.replace("\"", "") + url_string = url_string.replace("\'", "") + if url_string[:6] != "http://": + rel_to_full_path[url_string] = '"%s/img/%s/%s"' % (weburl, journal_name, url_string) + + for url in rel_to_full_path.keys(): + css = css.replace(url, rel_to_full_path[url]) + + return css + + + +#url(["']?(?P\S*)["']?) diff --git a/modules/webjournal/lib/webjournal_webinterface.py b/modules/webjournal/lib/webjournal_webinterface.py new file mode 100644 index 000000000..bc24b6878 --- /dev/null +++ b/modules/webjournal/lib/webjournal_webinterface.py @@ -0,0 +1,733 @@ +# -*- coding: utf-8 -*- +## $Id$ +## +## This file is part of CDS Invenio. +## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. +## +## CDS Invenio is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## CDS Invenio is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +"""WebJournal Web Interface.""" + +__revision__ = "$Id$" + +__lastupdated__ = """$Date$""" + +from mod_python import apache +from invenio.access_control_engine import acc_authorize_action +from invenio.config import weburl, webdir, cdslang, etcdir, \ + CFG_ACCESS_CONTROL_LEVEL_SITE, CFG_MISCUTIL_SMTP_HOST +from invenio.messages import gettext_set_language +from invenio.webpage import page +from invenio.webuser import getUid, page_not_authorized, isGuestUser +from invenio.webbasket import * +from invenio.webbasket_config import CFG_WEBBASKET_CATEGORIES, \ + CFG_WEBBASKET_ACTIONS +from invenio.urlutils import get_referer, redirect_to_url +from invenio.webinterface_handler import wash_urlargd, WebInterfaceDirectory + +from invenio.errorlib import register_exception +from webjournal_config import InvenioWebJournalNoArticleNumberError, \ + InvenioWebJournalNoArticleRuleError, \ + InvenioWebJournalNoIndexTemplateError, \ + InvenioWebJournalNoIssueNumberTagError, \ + InvenioWebJournalNoArticleNumberError, \ + InvenioWebJournalNoArticleTemplateError, \ + webjournal_missing_info_box, \ + webjournal_error_box + +import time +import os +import smtplib +from urllib2 import urlopen +from email import message_from_string +from xml.dom import minidom +from invenio.bibformat_engine import format_with_format_template, BibFormatObject +from invenio.search_engine import search_pattern + + +from invenio.webjournal_utils import get_recid_from_order, \ + get_recid_from_order_CERNBulletin, \ + parse_url_string, \ + get_xml_from_config, \ + please_login, \ + get_current_issue, \ + get_rule_string_from_rule_list, \ + get_monday_of_the_week, \ + createhtmlmail, \ + put_css_in_file + +class WebInterfaceJournalPages(WebInterfaceDirectory): + """Defines the set of /journal pages.""" + + _exports = ['', 'administrate', 'article', 'issue_control', 'search', 'alert', + 'feature_record'] + + def index(self, req, form): + """Index page.""" + argd = wash_urlargd(form, {'name': (str, ""), + 'issue': (str, ""), + 'category': (str, ""), + 'ln': (str, "")} + ) + # get / set url parameter + journal_name = "" + issue_number = "" + category = "" + req.journal_defaults = {} + try: + journal_name = argd['name'] + if journal_name == "": + raise KeyError + except KeyError: + register_exception(stream='warning', req=req, suffix="No Journal Name was provided.") + return webjournal_missing_info_box(req, title="Template not found", + msg_title="We don't know which journal you are looking for", + msg='''You were looking for a journal without providing a name. + Unfortunately we cannot know which journal you are looking for. + Below you have a selection of journals that are available on this server. + If you should find your journal there, just click the link, + otherwise please contact the server admin and ask for existence + of the journal you are looking for.''') + try: + issue_number = argd['issue'] + if issue_number == "": + raise KeyError + except KeyError: + issue_number = get_current_issue(journal_name) + req.journal_defaults["issue"] = issue_number + try : + category = argd['category'] + except KeyError: + pass # optional parameter + try: + language = argd['ln'] + except KeyError: + language = "en" + req.journal_defaults["ln"] = "en" + # english is default + # get all strings you want from the config files + config_strings = get_xml_from_config(["index", "rule", "issue_number"], journal_name) + try: + try: + index_page_template = config_strings["index"][0] + except: + raise InvenioWebJournalNoIndexTemplateError(journal_name) + except InvenioWebJournalNoIndexTemplateError, e: + register_exception(req=req) + return webjournal_error_box(req, + "Main Page Template not found", + "Problem with the configuration for this journal", + "The system couldn't find the template for the main page of this journal. This is a mandatory file and thus indicates that the journal was setup wrong or produced an internal error. If you are neither admin nor developer there is nothing you can do at this point, but send an email request. We apologize for the inconvenience.") + index_page_template_path = 'webjournal/%s' % (index_page_template) + + # find current selected category from the list of rules in the config + rule_list = config_strings["rule"] + try: + if len(rule_list) == 0: + raise InvenioWebJournalNoArticleRuleError() + except InvenioWebJournalNoArticleRuleError, e: + register_exception(req=req) + return webjournal_error_box(req, + "No Articles", + "Problem with the configuration of this journal", + "The system couldn't find the definitions for different article kinds (e.g. News, Sports, etc.). If there is nothing defined, nothing can be shown and it thus indicates that there is either a problem with the setup of this journal or in the Software itself. There is nothing you can do at this moment. If you wish you can send an inquiry to the responsible developers. We apologize for the inconvenience.") + try: + try: + issue_number_tag = config_strings["issue_number"][0] + except KeyError: + raise InvenioWebJournalNoIssueNumberTagError(journal_name) + except InvenioWebJournalNoIssueNumberTagError: + register_exception(req=req) + return webjournal_error_box(req, + title="No Issues", + title_msg="Problem with the configuration of this journal", + msg="The system couldn't find a definition for an issue numbering system. Issue numbers conrol the date of the publication you are seing. This indicates that there is an error in the setup of this journal or the Software itself. There is nothing you can do at the moment. If you wish you can send an inquiry to the responsible developers. We apologize for the inconvenience.") + + current_category_in_list = 0 + i = 0 + if category != "": + for rule_string in rule_list: + category_from_config = rule_string.split(",")[0] + if category_from_config.lower() == category.lower(): + current_category_in_list = i + i+=1 + else: + # add the first category to the url string + req.journal_defaults["category"] = rule_list[0].split(",")[0] + # get the important values for the category from the config file + rule_string = rule_list[current_category_in_list].replace(" ", "") + category = rule_string.split(",")[0] + rule = rule_string.split(",")[1] + marc_datafield = rule.split(":")[0] + rule_match = rule.split(":")[1] + marc_tag = marc_datafield[:3] + marc_ind1 = (str(marc_datafield[3]) == "_") and " " or marc_datafield[3] + marc_ind2 = (str(marc_datafield[4]) == "_") and " " or marc_datafield[4] + marc_subfield = marc_datafield[5] + # create a marc record, containing category and issue number + temp_marc = ''' + 0 + + %s + + + %s + + ''' % (issue_number_tag[:3], + (issue_number_tag[3] == "_") and " " or issue_number_tag[3], + (issue_number_tag[4] == "_") and " " or issue_number_tag[4], + issue_number_tag[5], + issue_number, marc_tag, marc_ind1, + marc_ind2, marc_subfield, rule_match) + temp_marc = temp_marc.decode('utf-8').encode('utf-8') + + # create a record and get HTML back from bibformat + bfo = BibFormatObject(0, ln=language, xml_record=temp_marc, req=req) # pass 0 for rn, we don't need it + html_out = format_with_format_template(index_page_template_path, bfo)[0] + # done ;) + return html_out + + def article(self, req, form): + """ + """ + argd = wash_urlargd(form, {'name': (str, ""), + 'issue': (str, ""), + 'category': (str, ""), + 'number': (str, ""), + 'ln': (str, "")} + ) + + # get / set url parameter + journal_name = "" + issue_number = "" + category = "" + number = "" + req.journal_defaults = {} + try: + journal_name = argd['name'] + if journal_name == "": + raise KeyError + except KeyError: + register_exception(stream='warning', req=req, suffix="No Journal Name was provided.") + return webjournal_missing_info_box(req, title="Template not found", + msg_title="We don't know which journal you are looking for", + msg='''You were looking for a journal without providing a name. + Unfortunately we cannot know which journal you are looking for. + Below you have a selection of journals that are available on this server. + If you should find your journal there, just click the link, + otherwise please contact the server admin and ask for existence + of the journal you are looking for.''') + try: + issue_number = argd['issue'] + if issue_number == "": + raise KeyError + except KeyError: + issue_number = get_current_issue(journal_name) + req.journal_defaults["issue"] = issue_number + try: + try: + number = argd['number'] + if number == "": + raise KeyError + except KeyError: + raise InvenioWebJournalNoArticleNumberError(journal_name) + except InvenioWebJournalNoArticleNumberError, e: + register_exception(req=req) + return webjournal_error_box(req, + title="No Article Number", + title_msg="We couldn't find the article you're looking for", + msg='''The system could not deduce the + article number you were looking for. This could have several + reasons. If you typed the adress directly to the browser, try + looking at the list of available journals + here. If you came to this page from a regular journal, then + this is most probably an error in the Software or the Archive + and there is nothing you can do at this point. If you wish you + can send an inquiry to the responsible developers. We apologize + for the inconvenience.''' % weburl) + try: + category = argd['category'] + except KeyError: + pass # optional parameter + try: + language = argd['ln'] + except KeyError: + language = "en" + req.journal_defaults["ln"] = "en" + config_strings = get_xml_from_config(["detailed", "rule"], journal_name) + # get the path to the format_template of this page + try: + try: + index_page_template = config_strings["detailed"][0] + except: + raise InvenioWebJournalNoArticleTemplateError(journal_name) + except InvenioWebJournalNoArticleTemplateError: + register_exception(req=req) + return webjournal_error_box(req, + "Article view Template not found", + "Problem with the configuration for this journal.", + "The system couldn't find the template for the article pages of this journal. This is a mandatory file and thus indicates that the journal was setup wrong or produced an internal error. If you are neither admin nor developer there is nothing you can do at this point, but send an email request. We apologize for the inconvenience.") + + index_page_template_path = 'webjournal/%s' % (index_page_template) + + # find current selected category from the list of rules in the config + rule_list = config_strings["rule"] + current_category_in_list = 0 + i = 0 + if category != "": + for rule_string in rule_list: + category_from_config = rule_string.split(",")[0] + if category_from_config.lower() == category.lower(): + current_category_in_list = i + i+=1 + # get the important values for the category from the config file + rule_string = rule_list[current_category_in_list].replace(" ", "") + rule = rule_string.split(",")[1] + + # recid = get_recid_from_order(number, rule, issue_number) + recid = get_recid_from_order_CERNBulletin(number, rule, issue_number) + # create a record and get HTML back from bibformat + bfo = BibFormatObject(recid, ln=language, req=req) + + html_out = format_with_format_template(index_page_template_path, + bfo)[0] + return html_out + + def administrate(self, req, form): + """Index page.""" + return "Not implemented yet." + + def feature_record(self, req, form): + """ + in the CERNBulletin used for "For the Eyes" section + """ + argd = wash_urlargd(form, {'name': (str, ""), + 'recid': (str, ""), + 'featured': (str, "false"), + 'url': (str, "") + }) + if argd['name'] == "": + return webjournal_missing_info_box(req, title="Journal not found", + msg_title="We don't know which journal you are looking for", + msg='''You were looking for a journal without providing a name. + Unfortunately we cannot know which journal you are looking for. + Below you have a selection of journals that are available on this server. + If you should find your journal there, just click the link, + otherwise please contact the server admin and ask for existence + of the journal you are looking for.''') + else: + journal_name = argd['name'] + + # login + if acc_authorize_action(req, 'cfgwebjournal', name="%s" % journal_name)[0] != 0: + # todo: pass correct language + return please_login(req, journal_name, backlink='%s/journal/feature_record?name=%s' % (weburl, journal_name)) + + if argd['recid'] == "": + interface = ''' +
    + +

    Featured Record's ID:

    + +

    Link to the picture that should be displayed

    + +
    + +
    + ''' % (weburl, journal_name) + return page(title="Feature a record", body=interface) + else: + fptr = open('%s/webjournal/%s/featured_record' % (etcdir, journal_name), "w") + fptr.write(argd['recid']) + fptr.write('\n') + fptr.write(argd['url']) + fptr.close() + return page(title="Successfully featured record: %s" % argd['recid'], body="") + + def alert(self, req, form): + """Alert system.""" + argd = wash_urlargd(form, {'name': (str, ""), + 'sent': (str, "false"), + 'plainText': (str, u''), + 'htmlMail': (str, ""), + 'recipients': (str, "")}) + if argd['name'] == "": + return webjournal_missing_info_box(req, title="Journal not found", + msg_title="We don't know which journal you are looking for", + msg='''You were looking for a journal without providing a name. + Unfortunately we cannot know which journal you are looking for. + Below you have a selection of journals that are available on this server. + If you should find your journal there, just click the link, + otherwise please contact the server admin and ask for existence + of the journal you are looking for.''') + else: + journal_name = argd['name'] + config_strings = get_xml_from_config(["niceName", "niceURL"], journal_name) + try: + display_name = config_strings["niceName"][0] + except: + display_name = journal_name + try: + url = config_string["niceURL"][0] + except: + url = '%s/journal/%s' % (weburl, journal_name) + issue = get_current_issue(journal_name) + # login + if acc_authorize_action(req, 'cfgwebjournal', name="%s" % journal_name)[0] != 0: + # todo: pass correct language + return please_login(req, journal_name, backlink='%s/journal/alert?name=%s' % (weburl, journal_name)) + plain_text = u'''Dear Subscriber, + +The latest issue of the %s, no. %s, has been released. +You can access it at the following URL: +%s + +Best Wishes, +The %s team + +---- +Cher Abonné, + +Le nouveau numéro du %s, no. %s, vient de paraître. +Vous pouvez y accéder à cette adresse : +%s + +Bonne lecture, +L'équipe du %s +''' % (display_name, issue, url, display_name, + display_name, issue, url, display_name) + + plain_text = plain_text.encode('utf-8') + + if argd['sent'] == "false": + interface = ''' +
    + +

    Recipients:

    + +

    Plain Text Message:

    + +

    Send Homepage as html: + +

    +
    + +
    + ''' % (weburl, journal_name, plain_text) + return page(title="alert system", body=interface) + else: + plain_text = argd['plainText'] + + if argd['htmlMail'] == "html": + html_file = urlopen('%s/journal/?name=%s&ln=en' % (weburl, journal_name)) + html_string = html_file.read() + html_file.close() + html_string = put_css_in_file(html_string, journal_name) + else: + html_string = plain_text.replace("\n", "
    ") + #html_message = message_from_string(html_string) + + subject = "%s %s released!" % (display_name, issue) + message = createhtmlmail(html_string, plain_text, subject) + server = smtplib.SMTP("mailer.konnektiv.ch") + server.login('web52p9', 'getin2') + server.sendmail('gabriel.hase@konnektiv.ch', argd['recipients'], message) + server.quit() + + return page(title="Alert sent successfully!", body="") + + def search(self, req, form): + """ + Creates a temporary record containing all the information needed for + the search, meaning list of issue_numbers (timeframe), list of keywords, + list of categories to search in. In this way everything can be configured + globally in the config for the given webjournal and we can reuse the bibformat + for whatever search we want. + """ + argd = wash_urlargd(form, {'name': (str, ""), + 'category': (list, []), + 'issue': (list, []), + 'keyword': (str, ""), + 'ln': (str, cdslang)}) + if argd['name'] == "": + register_exception(stream='warning', + suffix="User tried to search without providing a journal name.") + return webjournal_missing_info_box(req, title="Journal not found", + msg_title="We don't know which journal you are looking for", + msg='''You were looking for a journal without providing a name. + Unfortunately we cannot know which journal you are looking for. + Below you have a selection of journals that are available on this server. + If you should find your journal there, just click the link, + otherwise please contact the server admin and ask for existence + of the journal you are looking for.''') + else: + journal_name = argd['name'] + + config_strings = get_xml_from_config(["search", "issue_number", "rule"], journal_name) + try: + try: + search_page_template = config_strings["search"][0] + except: + raise InvenioWebJournalNoArticleTemplateError(journal_name) # todo: new exception + except InvenioWebJournalNoArticleTemplateError: + register_exception(req=req) + return webjournal_error_box(req, + "Search Page Template not found", + "Problem with the configuration for this journal.", + "The system couldn't find the template for the search result page of this journal. This is a mandatory file and thus indicates that the journal was setup wrong or produced an internal error. If you are neither admin nor developer there is nothing you can do at this point, but send an email request. We apologize for the inconvenience.") + search_page_template_path = 'webjournal/%s' % (search_page_template) + try: + try: + issue_number_tag = config_strings["issue_number"][0] + except KeyError: + raise InvenioWebJournalNoIssueNumberTagError(journal_name) + except InvenioWebJournalNoIssueNumberTagError: + register_exception(req=req) + return webjournal_error_box(req, + title="No Issues", + title_msg="Problem with the configuration of this journal", + msg="The system couldn't find a definition for an issue numbering system. Issue numbers conrol the date of the publication you are seing. This indicates that there is an error in the setup of this journal or the Software itself. There is nothing you can do at the moment. If you wish you can send an inquiry to the responsible developers. We apologize for the inconvenience.") + rule_list = config_strings["rule"] + try: + if len(rule_list) == 0: + raise InvenioWebJournalNoArticleRuleError() + except InvenioWebJournalNoArticleRuleError, e: + register_exception(req=req) + return webjournal_error_box(req, + "No searchable Articles", + "Problem with the configuration of this journal", + "The system couldn't find the definitions for different article kinds (e.g. News, Sports, etc.). If there is nothing defined, nothing can be shown and it thus indicates that there is either a problem with the setup of this journal or in the Software itself. There is nothing you can do at this moment. If you wish you can send an inquiry to the responsible developers. We apologize for the inconvenience.") + category_rules = [] + if argd['category'] == []: + # append all categories + for rule_string in rule_list: + marc = {} + marc["category"] = rule_string.split(",")[0] + rule = rule_string.split(",")[1] + marc_datafield = rule.split(":")[0] + marc["rule_match"] = rule.split(":")[1] + marc["marc_tag"] = marc_datafield[1:4] + marc["marc_ind1"] = (marc_datafield[4] == "_") and " " or marc_datafield[4] + marc["marc_ind2"] = (marc_datafield[5] == "_") and " " or marc_datafield[5] + marc["marc_subfield"] = marc_datafield[6] + category_rules.append(marc) + else: + # append only categories from the url param + for single_category in argd['category']: + rule_string = get_rule_string_from_rule_list(rule_list, single_category) + marc = {} + marc["category"] = rule_string.split(",")[0] + rule = rule_string.split(",")[1] + marc_datafield = rule.split(":")[0] + marc["rule_match"] = rule.split(":")[1] + marc["marc_tag"] = marc_datafield[1:4] + marc["marc_ind1"] = (marc_datafield[4] == "_") and " " or marc_datafield[4] + marc["marc_ind2"] = (marc_datafield[5] == "_") and " " or marc_datafield[5] + marc["marc_subfield"] = marc_datafield[6] + category_rules.append(marc) + + category_fields = "\n".join([''' + + %s + + ''' % (marc["marc_tag"], + marc["marc_ind1"], + marc["marc_ind2"], + marc["marc_subfield"], + marc["rule_match"]) for marc in category_rules]) + + issue_number_fields = "\n".join([''' + + %s + + ''' % (issue_number_tag[:3], + (issue_number_tag[3] == "_") and " " or issue_number_tag[3], + (issue_number_tag[4] == "_") and " " or issue_number_tag[4], + issue_number_tag[5], + issue_number) for issue_number in argd['issue']]) + + temp_marc = ''' + 0 + %s + %s + ''' % (issue_number_fields, category_fields) + + + # create a record and get HTML back from bibformat + bfo = BibFormatObject(0, ln=argd['ln'], xml_record=temp_marc, req=req) # pass 0 for rn, we don't need it + html_out = format_with_format_template(search_page_template_path, bfo)[0] + + #perform_request_search(cc="News Articles", p="families and 773__n:23/2007") + #cc = argd['category'] + #p = keyword + #for issue_number in argd['issue_number']: + # p += " and 773__n:%s" % issue_number + ## todo: issue number tag generic from config + #results = perform_request_search(cc=cc, p=p) + + return html_out + + def issue_control(self, req, form): + """ + page that allows full control over creating, backtracing, adding to, + removing from issues. + """ + argd = wash_urlargd(form, {'name': (str, ""), + 'add': (str, ""), + 'action_publish': (str, "cfg"), + 'issue_number': (list, [])} + ) + if argd['name'] == "": + return webjournal_missing_info_box(req, title="Journal not found", + msg_title="We don't know which journal you are looking for", + msg='''You were looking for a journal without providing a name. + Unfortunately we cannot know which journal you are looking for. + Below you have a selection of journals that are available on this server. + If you should find your journal there, just click the link, + otherwise please contact the server admin and ask for existence + of the journal you are looking for.''') + else: + journal_name = argd['name'] + + action = argd['action_publish'] + issue_numbers = argd['issue_number'] + + if acc_authorize_action(req, 'cfgwebjournal', name="%s" % journal_name)[0] != 0: + # todo: pass correct language + return please_login(req, journal_name) + + if action == "cfg" or action == "Refresh": + active_issues = [] + if action == "Refresh": + active_issues = issue_numbers + try: + active_issues.remove("mm/yyyy") + except: + pass + from sets import Set + active_issues = list(Set(active_issues)) # avoid double entries + active_issues.sort() + else: + try: + issue_group = open('%s/webjournal/%s/issue_group' % (etcdir, + journal_name)).read() + except: + issue_group = "" + try: + current_issue = open('%s/webjournal/%s/current_issue' % (etcdir, + journal_name)).read() + except: + register_exception(stream='warning', req=req, + suffix="Couldn't find any current issue, if this is the first time for this journal this is fine.") + current_issue = "" + if issue_group != "": + issue_part = issue_group.split(" - ")[0] + year = issue_part.split("/")[1] + low_bound = issue_part.split("/")[0].split("-")[0] + high_bound = issue_part.split("/")[0].split("-")[1] + for i in range(int(low_bound), int(high_bound)+1): + active_issues.append("%s/%s" % (str(i), year)) + elif current_issue != "": + issue_part = current_issue.split(" - ")[0] + issue_number = issue_part.replace(" ", "") + active_issues.append(issue_number) + this_weeks_issue = time.strftime("%U/%Y", time.localtime()) + + output = ''' +
    +

    CERN eBulletin Publishing Interface

    +

    This interface gives you the possibilite to create + your current webjournal publication. Every checked + issue number will be in the current publication. Once + you've made your selection you can publish the new + issue by clicking the Publish button at the end.

    +
    + +
      +

      Active issues::..

      + %s +
      +

      This weeks issue::..

      +
    • %s
    • +

      Add custom issue

      + + +
      +
      + + +
    + ''' % (weburl, + journal_name, + "".join(['
  •  %s
  • ' % (issue, issue) for issue in active_issues]), + ' %s' % (this_weeks_issue, this_weeks_issue)) + + elif action == "Publish": + active_issues = issue_numbers + try: + active_issues.remove("mm/yyyy") + except: + pass + from sets import Set + active_issues = list(Set(active_issues)) # avoid double entries + active_issues.sort() + + file_issue_group = open('%s/webjournal/%s/issue_group' % (etcdir, + journal_name), "w") + file_current_issue = open('%s/webjournal/%s/current_issue' % (etcdir, + journal_name), "w") + + if len(active_issues) > 1: + low_bound = active_issues[0].split("/")[0] + high_bound = active_issues[len(active_issues)-1].split("/")[0] + year = active_issues[len(active_issues)-1].split("/")[1] + file_issue_group.write('%s-%s/%s - %s' % (low_bound, + high_bound, + year, + get_monday_of_the_week(high_bound, year))) + file_current_issue.write('%s/%s - %s' % (high_bound, + year, + get_monday_of_the_week(high_bound, year))) + elif len(active_issues) > 0: + issue_number = active_issues[0].split("/")[0] + year = active_issues[0].split("/")[1] + file_current_issue.write('%s/%s - %s' % (issue_number, + year, + get_monday_of_the_week(issue_number, year))) + else: + register_exception(stream='warning', req=req, + suffix='empty issue has been published.') + + file_current_issue.close() + file_issue_group.close() + + output = ''' +

    Congrats! You are a true publisher.

    +

    Your current journal has been published with the + following issue numbers:

    +
      + %s +
    +
    +

    If you need to make changes just go back here

    +

    To look at your newly creted bulletin, go here

    + ''' % ("".join(["
  • %s
  • " % issue for issue in active_issues]), + weburl + "/journal/issue_control?name=" + journal_name, + weburl + "/journal/?name=" + journal_name) + + return page(title="Publish System", body=output) + +if __name__ == "__main__": + index() \ No newline at end of file diff --git a/modules/Makefile.am b/modules/webjournal/lib/widgets/Makefile.am similarity index 76% copy from modules/Makefile.am copy to modules/webjournal/lib/widgets/Makefile.am index 1dd7a6696..f39687219 100644 --- a/modules/Makefile.am +++ b/modules/webjournal/lib/widgets/Makefile.am @@ -1,22 +1,25 @@ ## $Id$ - +## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = bibclassify bibconvert bibedit bibharvest bibmatch bibsched bibindex bibrank bibupload bibformat elmsubmit miscutil webstyle websession webhelp webbasket webalert websearch websubmit webaccess webmessage webstat webcomment +pylibdir=$(libdir)/python/invenio/bibformat_elements + +pylib_DATA = bfe_webjournal_widget_weather.py bfe_webjournal_widget_seminars.py bfe_webjournal_widget_latestPhoto.py +EXTRA_DIST = $(pylib_DATA) $(tmp_DATA) -CLEANFILES = *~ +CLEANFILES = *~ *.tmp diff --git a/modules/webjournal/lib/widgets/bfe_webjournal_widget_latestPhoto.py b/modules/webjournal/lib/widgets/bfe_webjournal_widget_latestPhoto.py new file mode 100644 index 000000000..91637caaa --- /dev/null +++ b/modules/webjournal/lib/widgets/bfe_webjournal_widget_latestPhoto.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python + +from invenio.bibformat_engine import BibFormatObject +from invenio.search_engine import perform_request_search + +CDS_Photo_URL = "http://cdsweb.cern.ch/search?cc=Press+Office+Photo+Selection&as=1&rg=1&of=xm" +Recursion_Upper_Limit = 10 + +def format(bfo): + """ + """ + out = get_widget_HTML(bfo.lang, 1) + return out + +def escape_values(bfo): + """ + """ + return 0 + +def get_widget_HTML(language, number): + """ + """ + # limit the recursion + if int(number) > int(Recursion_Upper_Limit): + return "" + latest_photo_id = perform_request_search(cc='Press Office Photo Selection', rg=number, as=1, of='id') # todo: change cc='Press+Office+Photo+Selection' + try: + latest_photo_record = BibFormatObject(latest_photo_id[number - 1]) + except: + # todo: Exception, no photo in this selection + return "" + recid = latest_photo_record.control_field("001") + if language == "fr": + try: + title = latest_photo_record.fields('246_1a')[0] + except KeyError: + title = "" + else: + try: + title = latest_photo_record.fields('245__a')[0] + except KeyError: + # todo: exception, picture with no title + title = "" + # first try to get the images from dfs, this should be the format they are in! + icon_url = {} + i = 1 + dfs_images = latest_photo_record.fields('8567_') + for image_block in dfs_images: + try: + if image_block["y"] == "Icon": + if image_block["u"][:7] == "http://": + if image_block["8"] != "": + icon_url[int(image_block["8"])] = image_block["u"] + else: + try: + icon_url[i] = image_block["u"] + except: + # icon could not be added + pass + except: + # probably some key error, thats ok + pass + i+=1 + # todo: does this return the first? + try: + icon_tuple = icon_url.popitem() + icon_url = icon_tuple[1] + except: + # oh well, no dfs data... try to go for doc machine + doc_machine_images = latest_photo_record.fields('8564_') + # todo: implement parsing for external doc machine pages! + html_out = "" + if icon_url == "": + html_out = get_widget_HTML("en", number+1) + else: + # assemble the HTML + html_out = 'latest Photo%s' % ("http://test-multimedia-gallery.web.cern.ch/test-multimedia-gallery/PhotoGallery_Detailed.aspx?searchTerm=recid:" + recid + "&page=1&order=1", + icon_url, + title) +# +## +#Detail of the sensor from the first CMS half tracker inner barrel +# + return html_out +if __name__ == "__main__": + get_widget_HTML("en", 1) \ No newline at end of file diff --git a/modules/webjournal/lib/widgets/bfe_webjournal_widget_seminars.py b/modules/webjournal/lib/widgets/bfe_webjournal_widget_seminars.py new file mode 100644 index 000000000..40d18a9ac --- /dev/null +++ b/modules/webjournal/lib/widgets/bfe_webjournal_widget_seminars.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python + +from invenio.config import cachedir +from urllib2 import urlopen +from xml.dom import minidom +import time + +Cached_Filename = "webjournal_widget_seminars.xml" +Indico_Seminar_Location = "http://indico.cern.ch/tools/export.py?fid=1l7&date=today&days=1&of=xml" +Update_Frequency = 3600 # in seconds + +def format(bfo): + """ + """ + out = get_widget_HTML() + return out + +def escape_values(bfo): + """ + """ + return 0 + +def get_widget_HTML(): + """ + Indico seminars of the day service + Gets seminars of the day from CERN Indico every 60 minutes and displays + them in a widget. + """ + try: + seminar_xml = minidom.parse('%s/%s' % (cachedir, Cached_Filename)) + except: + _update_seminars() + seminar_xml = minidom.parse('%s/%s' % (cachedir, Cached_Filename)) + try: + timestamp = seminar_xml.firstChild.getAttribute("time") + except: + timestamp = time.struct_time() + + last_update = time.mktime(time.strptime(timestamp, "%a, %d %b %Y %H:%M:%S %Z")) + now = time.mktime(time.gmtime()) + if last_update + Update_Frequency < now: + _update_seminars() + seminar_xml = minidom.parse('%s/%s' % (cachedir, Cached_Filename)) + + html = "" + seminars = seminar_xml.getElementsByTagName("seminar") + if len(seminars) == 0: + return "
  • no seminars today
  • " + for seminar in seminars: + html += "
  • " + try: + seminar_time = seminar.getElementsByTagName("start_time")[0].firstChild.toxml() + except: + seminar_time = "" + try: + category = seminar.getElementsByTagName("category")[0].firstChild.toxml() + except: + category = "Seminar" + html += '%s %s
    ' % (seminar_time, category) + try: + title = seminar.getElementsByTagName("title")[0].firstChild.toxml() + except: + title = "" + try: + url = seminar.getElementsByTagName("url")[0].firstChild.toxml() + except: + url = "#" + try: + speaker = seminar.getElementsByTagName("speaker")[0].firstChild.toxml() + except: + speaker = "" + if (title != ""): + html += '%s, %s
    ' % (url, title, speaker) + try: + room = seminar.getElementsByTagName("room")[0].firstChild.toxml() + except: + room = "" + html += room + + html += "
  • " + + return html.encode('utf-8') + +def _update_seminars(): + """ + helper function that gets the xml data source from CERN Indico and creates + a dedicated xml file in the cache for easy use in the widget. + """ + indico_xml = urlopen(Indico_Seminar_Location) + xml_file_handler = minidom.parseString(indico_xml.read()) + seminar_xml = ['' % time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()), ] + agenda_items = xml_file_handler.getElementsByTagName("agenda_item") + for item in agenda_items: + seminar_xml.extend(["", ]) + try: + start_time = item.getElementsByTagName("start_time")[0].firstChild.toxml() + except: + start_time = "" + seminar_xml.extend(["%s" % start_time, ]) + try: + category = item.getElementsByTagName("category")[0].firstChild.toxml() + category = category.split("/")[-1] + category = category.replace("&", "") + category = category.replace("nbsp;", "") + category = category.replace(" ", "") + except: + category = "" + seminar_xml.extend(["%s" % category, ]) + try: + title = item.getElementsByTagName("title")[0].firstChild.toxml() + except: + title = "" + seminar_xml.extend(["%s" % title, ]) + try: + url = item.getElementsByTagName("agenda_url")[0].firstChild.toxml() + except: + url = "#" + seminar_xml.extend(["%s" % url, ]) + try: + speaker = item.getElementsByTagName("speaker")[0].firstChild.toxml() + except: + speaker = "" + seminar_xml.extend(["%s" % speaker, ]) + try: + room = item.getElementsByTagName("room")[0].firstChild.toxml() + except: + room = "" + seminar_xml.extend(["%s" % room, ]) + seminar_xml.extend(["", ]) + seminar_xml.extend(["", ]) + # write the created file to cache + fptr = open("%s/%s" % (cachedir, Cached_Filename), "w") + fptr.write(("\n".join(seminar_xml)).encode('utf-8')) + fptr.close() + +if __name__ == "__main__": + get_widget_HTML() \ No newline at end of file diff --git a/modules/webjournal/lib/widgets/bfe_webjournal_widget_weather.py b/modules/webjournal/lib/widgets/bfe_webjournal_widget_weather.py new file mode 100644 index 000000000..f12d0c2e7 --- /dev/null +++ b/modules/webjournal/lib/widgets/bfe_webjournal_widget_weather.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +""" +""" +from invenio import errorlib +from invenio.config import cachedir +import feedparser +import time +from urllib2 import urlopen +from invenio.errorlib import register_exception +import re + +Weather_Service = "Yahoo! Weather" +# rss feed on yahoo weather, check developer.yahoo.com/weather for details +RSS_Feed = "http://weather.yahooapis.com/forecastrss?p=SZXX0008&u=c" +# filename of the rss feed in cache +Cached_Filename = "webjournal_widget_YahooWeather.rss" +# filename of flat file in cache that holds the expire time +Expire_Time_Filename = "weather_RSS_expires" + +image_pattern = re.compile(''' + \S*)\s*/>* + ''' + ,re.DOTALL | re.IGNORECASE | re.VERBOSE) + +def format(bfo, title=""): + """ + wrapper function needed for BibFormat to route the widget HTML + """ + out = get_widget_HTML() + if title != "": + try: + weather_image_match = image_pattern.findall(out)[0] + weather_image = weather_image_match[1] + out = re.sub(image_pattern, "", out) + except: + register_exception(req=bfo.req) + weather_image = "" + weather_image = weather_image.replace("\"", "\'") + out = '''
    +

    %s

    +
    +
      + %s +
    + ''' % (weather_image, title, out) + return out + +def escape_values(bfo): + """ + """ + return 0 + +def get_widget_HTML(): + """ + weather forecast using Yahoo! Weather service + we check and store the "expires" data from the rss feed to decide when + an update is needed. + there always resides a cached version in cds cachedir along with a flat + file that indicates the time when the feed expires. + """ + try: + weather_feed = feedparser.parse('%s/%s' % (cachedir, Cached_Filename)) + except: + _update_feed() + weather_feed = feedparser.parse('%s/%s' % (cachedir, Cached_Filename)) + + now_in_gmt = time.gmtime() + now_time_string = time.strftime( "%a, %d %b %Y %H:%M:%S GMT", now_in_gmt) + try: + expire_time_string = open('%s/%s' (cachedir, Expire_Time_Filename)).read() + expire_time = time.strptime(open(Expire_Time_Filename).read(), "%a, %d %b %Y %H:%M:%S %Z") + #expire_time['tm_isdt'] = 0 + expire_in_seconds = time.mktime(expire_time) + now_in_seconds = time.mktime(now_in_gmt) + diff = time.mktime(expire_time) - time.mktime(now_in_gmt) + except: + diff = -1 + if diff < 0: + _update_feed() + weather_feed = feedparser.parse('%s/%s' % (cachedir, Cached_Filename)) + + # construct the HTML + html = weather_feed.entries[0]['summary'] + + return html + + +def _update_feed(): + """ + helper function that updates the feed by copying the new rss file to the + cache dir and resetting the time string on the expireTime flat file + """ + feed = urlopen(RSS_Feed) + cached_file = open('%s/%s' % (cachedir, Cached_Filename), 'w') + cached_file.write(feed.read()) + cached_file.close() + feed_data = feedparser.parse(RSS_Feed) + expire_time = feed_data.headers['expires'] + expire_file = open('%s/%s' % (cachedir, Expire_Time_Filename), 'w') + expire_file.write(expire_time) + expire_file.close() + +if __name__ == "__main__": + from invenio.bibformat_engine import BibFormatObject + myrec = BibFormatObject(7) + format(myrec) \ No newline at end of file diff --git a/modules/webstyle/lib/webinterface_layout.py b/modules/webstyle/lib/webinterface_layout.py index cad20b984..e32aa5ec0 100644 --- a/modules/webstyle/lib/webinterface_layout.py +++ b/modules/webstyle/lib/webinterface_layout.py @@ -1,102 +1,103 @@ # -*- coding: utf-8 -*- ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Global organisation of the application's URLs. This module binds together CDS Invenio's modules and maps them to their corresponding URLs (ie, /search to the websearch modules,...) """ __revision__ = \ "$Id$" from invenio.webinterface_handler import create_handler from invenio.websearch_webinterface import WebInterfaceSearchInterfacePages, \ WebInterfaceRSSFeedServicePages from invenio.websubmit_webinterface import websubmit_legacy_getfile, \ WebInterfaceSubmitPages from invenio.websession_webinterface import WebInterfaceYourAccountPages, \ WebInterfaceYourGroupsPages from invenio.webalert_webinterface import WebInterfaceYourAlertsPages from invenio.webbasket_webinterface import WebInterfaceYourBasketsPages from invenio.webcomment_webinterface import WebInterfaceCommentsPages from invenio.webmessage_webinterface import WebInterfaceYourMessagesPages from invenio.errorlib_webinterface import WebInterfaceErrorPages from invenio.oai_repository_webinterface import WebInterfaceOAIProviderPages from invenio.webstat_webinterface import WebInterfaceStatsPages -#from invenio.webjournal_webinterface import WebInterfaceJournalPages + +from invenio.webjournal_webinterface import WebInterfaceJournalPages from invenio.webdoc_webinterface import WebInterfaceDocumentationPages class WebInterfaceInvenio(WebInterfaceSearchInterfacePages): """ The global URL layout is composed of the search API plus all the other modules.""" _exports = WebInterfaceSearchInterfacePages._exports + [ 'youraccount', 'youralerts', 'yourbaskets', 'yourmessages', 'yourgroups', 'comments', 'error', 'oai2d', ('oai2d.py', 'oai2d'), ('getfile.py', 'getfile'), 'submit', 'rss', 'stats', 'journal', 'help' ] def __init__(self): self.getfile = websubmit_legacy_getfile return submit = WebInterfaceSubmitPages() youraccount = WebInterfaceYourAccountPages() youralerts = WebInterfaceYourAlertsPages() yourbaskets = WebInterfaceYourBasketsPages() yourmessages = WebInterfaceYourMessagesPages() yourgroups = WebInterfaceYourGroupsPages() comments = WebInterfaceCommentsPages() error = WebInterfaceErrorPages() oai2d = WebInterfaceOAIProviderPages() rss = WebInterfaceRSSFeedServicePages() stats = WebInterfaceStatsPages() - #journal = WebInterfaceJournalPages() + journal = WebInterfaceJournalPages() help = WebInterfaceDocumentationPages() # This creates the 'handler' function, which will be invoked directly # by mod_python. handler = create_handler(WebInterfaceInvenio())