diff --git a/modules/bibformat/lib/bibformat_engine.py b/modules/bibformat/lib/bibformat_engine.py index 3ea88eb46..fa098f95d 100644 --- a/modules/bibformat/lib/bibformat_engine.py +++ b/modules/bibformat/lib/bibformat_engine.py @@ -1,1611 +1,1611 @@ # -*- coding: utf-8 -*- +## ## $Id$ -## Bibformt engine. Format XML Marc record using specified format. - +## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Formats a single XML Marc record using specified format. There is no API for the engine. Instead use bibformat.py. SEE: bibformat.py, bibformat_utils.py """ import re import sys import os import inspect import traceback import zlib from invenio.errorlib import register_errors, get_msgs_for_code_list from invenio.config import * from invenio.bibrecord import create_record, record_get_field_instances, record_get_field_value, record_get_field_values from invenio.dbquery import run_sql from invenio.messages import language_list_long, wash_language from invenio import bibformat_dblayer from invenio.bibformat_config import format_template_extension, format_output_extension, templates_path, elements_path, outputs_path, elements_import_path from bibformat_utils import record_get_xml from xml.dom import minidom #Remove when call_old_bibformat is removed __lastupdated__ = """$Date$""" #Cache for data we have allready read and parsed format_templates_cache = {} format_elements_cache = {} format_outputs_cache = {} kb_mappings_cache = {} cdslangs = language_list_long() #Regular expression for finding ... tag in format templates pattern_lang = re.compile(r''' #closing start tag (?P.*?) #anything but the next group (greedy) () #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) #Builds regular expression for finding each known language in tags ln_pattern_text = r"<(" for lang in cdslangs: ln_pattern_text += lang[0] +r"|" ln_pattern_text = ln_pattern_text.rstrip(r"|") ln_pattern_text += r")>(.*?)" ln_pattern = re.compile(ln_pattern_text) #Regular expression for finding tag in format templates pattern_format_template_name = re.compile(r''' #closing start tag (?P.*?) #name value. any char that is not end tag ()(\n)? #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) #Regular expression for finding tag in format templates pattern_format_template_desc = re.compile(r''' #closing start tag (?P.*?) #description value. any char that is not end tag (\n)? #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) #Regular expression for finding tags in format templates pattern_tag = re.compile(r''' [^/\s]+) #any char but a space or slash \s* #any number of spaces (?P(\s* #params here (?P([^=\s])*)\s* #param name: any chars that is not a white space or equality. Followed by space(s) =\s* #equality: = followed by any number of spaces (?P[\'"]) #one of the separators (?P.*?) #param value: any chars that is not a separator like previous one (?P=sep) #same separator as starting one )*) #many params \s* #any number of spaces (/)?> #end of the tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) #Regular expression for finding params inside tags in format templates pattern_function_params = re.compile(''' (?P([^=\s])*)\s* # Param name: any chars that is not a white space or equality. Followed by space(s) =\s* # Equality: = followed by any number of spaces (?P[\'"]) # One of the separators (?P.*?) # Param value: any chars that is not a separator like previous one (?P=sep) # Same separator as starting one ''', re.VERBOSE | re.DOTALL ) #Regular expression for finding format elements "params" attributes (defined by @param) pattern_format_element_params = re.compile(''' @param\s* # Begins with @param keyword followed by space(s) (?P[^\s=]*)\s* # A single keyword, and then space(s) #(=\s*(?P[\'"]) # Equality, space(s) and then one of the separators #(?P.*?) # Default value: any chars that is not a separator like previous one #(?P=sep) # Same separator as starting one #)?\s* # Default value for param is optional. Followed by space(s) (?P.*) # Any text that is not end of line (thanks to MULTILINE parameter) ''', re.VERBOSE | re.MULTILINE) #Regular expression for finding format elements "see also" attribute (defined by @see) pattern_format_element_seealso = re.compile('''@see\s*(?P.*)''', re.VERBOSE | re.MULTILINE) #Regular expression for finding 2 expressions in quotes, separated by comma (as in template("1st","2nd") ) #Used when parsing output formats ## pattern_parse_tuple_in_quotes = re.compile(''' ## (?P[\'"]) ## (?P.*) ## (?P=sep1) ## \s*,\s* ## (?P[\'"]) ## (?P.*) ## (?P=sep2) ## ''', re.VERBOSE | re.MULTILINE) def call_old_bibformat(recID, format="HD"): """ FIXME: REMOVE FUNCTION WHEN MIGRATION IS DONE Calls BibFormat for the record RECID in the desired output format FORMAT. Note: this functions always try to return HTML, so when bibformat returns XML with embedded HTML format inside the tag FMT $g, as is suitable for prestoring output formats, we perform un-XML-izing here in order to return HTML body only. """ # look for formatted notice existence: query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format) res = run_sql(query, None, 1) if res: # record 'recID' is formatted in 'format', so print it decompress = zlib.decompress return "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format: out = "" pipe_input, pipe_output, pipe_error = os.popen3(["%s/bibformat" % bindir, "otype=%s" % format], 'rw') #pipe_input.write(print_record(recID, "xm")) pipe_input.write(record_get_xml(recID, "xm")) pipe_input.close() bibformat_output = pipe_output.read() pipe_output.close() pipe_error.close() if bibformat_output.startswith(""): dom = minidom.parseString(bibformat_output) for e in dom.getElementsByTagName('subfield'): if e.getAttribute('code') == 'g': for t in e.childNodes: out += t.data.encode('utf-8') else: out = bibformat_output return out def format_record(recID, of, ln=cdslang, verbose=0, search_pattern=[], xml_record=None, uid=None): """ Formats a record given output format. Main entry function of bibformat engine. Returns a formatted version of the record in the specified language, search pattern, and with the specified output format. The function will define which format template must be applied. You can either specify an record ID to format, or give its xml representation. if 'xml_record' != None, then use it instead of recID. 'uid' allows to grant access to some functionalities on a page depending on the user's priviledges. @param recID the ID of record to format @param of an output format code (or short identifier for the output format) @param ln the language to use to format the record @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, stop if error in format elements 9: errors and warnings, stop if error (debug mode )) @param search_pattern list of strings representing the user request in web interface @param xml_record an xml string representing the record to format @param uid the user id of the person who will view the formatted page @return formatted record """ errors_ = [] # Temporary workflow (during migration of formats): # Call new BibFormat # But if format not found for new BibFormat, then call old BibFormat #Create a BibFormat Object to pass that contain record and context bfo = BibFormatObject(recID, ln, search_pattern, xml_record, uid) #Find out which format template to use based on record and output format. template = decide_format_template(bfo, of) ############### FIXME: REMOVE WHEN MIGRATION IS DONE ############### path = "%s%s%s" % (templates_path, os.sep, template) if template == None or not os.access(path, os.R_OK): # template not found in new BibFormat. Call old one if php: return call_old_bibformat(recID, format=of) ############################# END ################################## error = get_msgs_for_code_list([("ERR_BIBFORMAT_NO_TEMPLATE_FOUND", of)], - file='error', ln=cdslang) + stream='error', ln=cdslang) errors_.append(error) if verbose == 0: register_errors(error, 'error') elif verbose > 5: return error[0][1] return "" #Format with template (out, errors) = format_with_format_template(template, bfo, verbose) errors_.extend(errors) return out def decide_format_template(bfo, of): """ Returns the format template name that should be used for formatting given output format and BibFormatObject. Look at of rules, and take the first matching one. If no rule matches, returns None To match we ignore lettercase and spaces before and after value of rule and value of record @param bfo a BibFormatObject @param of the code of the output format to use """ output_format = get_output_format(of) for rule in output_format['rules']: value = bfo.field(rule['field']).strip()#Remove spaces pattern = rule['value'].strip() #Remove spaces if re.match(pattern, value, re.IGNORECASE) != None: return rule['template'] template = output_format['default'] if template != '': return template else: return None def format_with_format_template(format_template_filename, bfo, verbose=0, format_template_code=None): """ Format a record given a format template. Also returns errors Returns a formatted version of the record represented by bfo, in the language specified in bfo, and with the specified format template. Parameter format_template_filename will be ignored if format_template_code is provided. This allows to preview format code without having to save file on disk @param format_template_filename the dilename of a format template @param bfo the object containing parameters for the current formatting @param format_template_code if not empty, use code as template instead of reading format_template_filename (used for previews) @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (formatted text, errors) """ errors_ = [] if format_template_code != None: format_content = str(format_template_code) else: format_content = get_format_template(format_template_filename)['code'] localized_format = filter_languages(format_content, bfo.lang) (evaluated_format, errors) = eval_format_template_elements(localized_format, bfo, verbose) errors_ = errors return (evaluated_format, errors) def eval_format_template_elements(format_template, bfo, verbose=0): """ Evalutes the format elements of the given template and replace each element with its value. Also returns errors. Prepare the format template content so that we can directly replace the marc code by their value. This implies: 1) Look for special tags 2) replace special tags by their evaluation @param format_template the format template code @param bfo the object containing parameters for the current formatting @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (result, errors) """ errors_ = [] #First define insert_element_code(match), used in re.sub() function def insert_element_code(match): """ Analyses 'match', interpret the corresponding code, and return the result of the evaluation. Called by substitution in 'eval_format_template_elements(...)' @param match a match object corresponding to the special tag that must be interpreted """ function_name = match.group("function_name") format_element = get_format_element(function_name, verbose) params = {} #look for function parameters given in format template code all_params = match.group('params') if all_params != None: function_params_iterator = pattern_function_params.finditer(all_params) for param_match in function_params_iterator: name = param_match.group('param') value = param_match.group('value') params[name] = value #Evaluate element with params and return (Do not return errors) (result, errors) = eval_format_element(format_element, bfo, params, verbose) errors_ = errors return result #Substitute special tags in the format by our own text. #Special tags have the form format = pattern_tag.sub(insert_element_code, format_template) return (format, errors_) def eval_format_element(format_element, bfo, parameters={}, verbose=0): """ Returns the result of the evaluation of the given format element name, with given BibFormatObject and parameters. Also returns the errors of the evaluation. @param format_element a format element structure as returned by get_format_element @param bfo a BibFormatObject used for formatting @param parameters a dict of parameters to be used for formatting. Key is parameter and value is value of parameter @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (result, errors) """ errors = [] #Load special values given as parameters prefix = parameters.get('prefix', "") suffix = parameters.get('suffix', "") default_value = parameters.get('default', "") #3 possible cases: #a) format element file is found: we execute it #b) format element file is not found, but exist in tag table (e.g. bfe_isbn) #c) format element is totally unknown. Do nothing or report error if format_element != None and format_element['type'] == "python": #a) #We found an element with the tag name, of type "python" #Prepare a dict 'params' to pass as parameter to 'format' function of element params = {} #look for parameters defined in format element #fill them with specified default values and values #given as parameters for param in format_element['attrs']['params']: name = param['name'] default = param['default'] params[name] = parameters.get(name, default) #Add BibFormatObject params['bfo'] = bfo #execute function with given parameters and return result. output_text = "" function = format_element['code'] try: output_text = apply(function, (), params) except Exception, e: output_text = "" name = format_element['attrs']['name'] error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT", name, str(params)) errors.append(error) if verbose == 0: register_errors(errors, 'error') elif verbose >=5: tb = sys.exc_info()[2] - error_string = get_msgs_for_code_list(error, file='error', ln=cdslang) + error_string = get_msgs_for_code_list(error, stream='error', ln=cdslang) stack = traceback.format_exception(Exception, e, tb, limit=None) output_text = ''+error_string[0][1] + "".join(stack) +' ' if output_text == None: output_text = "" else: output_text = str(output_text) #Add prefix and suffix if they have been given as parameters and if #the evaluation of element is not empty if output_text.strip() != "": output_text = prefix + output_text + suffix #Add the default value if output_text is empty if output_text == "": output_text = default_value return (output_text, errors) elif format_element != None and format_element['type'] =="field": #b) #We have not found an element in files that has the tag name. Then look for it #in the table "tag" # # # #Load special values given as parameters separator = parameters.get('separator ', "") nbMax = parameters.get('nbMax', "") #Get the fields tags that have to be printed tags = format_element['attrs']['tags'] output_text = [] #Get values corresponding to tags for tag in tags: p_tag = parse_tag(tag) values = record_get_field_values(bfo.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) if len(values)>0 and isinstance(values[0], dict):#flatten dict to its values only values_list = map(lambda x: x.values(), values) #output_text.extend(values) for values in values_list: output_text.extend(values) else: output_text.extend(values) if nbMax != "": try: nbMax = int(nbMax) output_text = output_text[:nbMax] except: name = format_element['attrs']['name'] error = ("ERR_BIBFORMAT_NBMAX_NOT_INT", name) errors.append(error) if verbose < 5: register_errors(error, 'error') elif verbose >=5: - error_string = get_msgs_for_code_list(error, file='error', ln=cdslang) + error_string = get_msgs_for_code_list(error, stream='error', ln=cdslang) output_text = output_text.append(error_string[0][1]) #Add prefix and suffix if they have been given as parameters and if #the evaluation of element is not empty. #If evaluation is empty string, return default value if it exists. Else return empty string if ("".join(output_text)).strip() != "": return (prefix + separator.join(output_text) + suffix, errors) else: #Return default value return (default_value, errors) else: #c) Element is unknown error = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", format_element)], - file='error', ln=cdslang) + stream='error', ln=cdslang) errors.append(error) if verbose < 5: register_errors(error, 'error') return ("", errors) elif verbose >=5: if verbose >= 9: sys.exit(error[0][1]) return (''+error[0][1]+'', errors) def filter_languages(format_template, ln='en'): """ Filters the language tags that do not correspond to the specified language. @param format_template the format template code @param ln the language that is NOT filtered out from the template @return the format template with unnecessary languages filtered out """ #First define search_lang_tag(match) and clean_language_tag(match), used #in re.sub() function def search_lang_tag(match): """ Searches for the ... tag and remove inner localized tags such as , , that are not current_lang. If current_lang cannot be found inside ... , try to use 'cdslang' @param match a match object corresponding to the special tag that must be interpreted """ current_lang = ln def clean_language_tag(match): """ Return tag text content if tag language of match is output language. Called by substitution in 'filter_languages(...)' @param match a match object corresponding to the special tag that must be interpreted """ if match.group(1) == current_lang: return match.group(2) else: return "" #End of clean_language_tag lang_tag_content = match.group("langs") #Try to find tag with current lang. If it does not exists, then current_lang #becomes cdslang until the end of this replace pattern_current_lang = re.compile(r"<"+current_lang+"\s*>(.*?)") if re.search(pattern_current_lang, lang_tag_content) == None: current_lang = cdslang cleaned_lang_tag = ln_pattern.sub(clean_language_tag, lang_tag_content) return cleaned_lang_tag #End of search_lang_tag filtered_format_template = pattern_lang.sub(search_lang_tag, format_template) return filtered_format_template def parse_tag(tag): """ Parse a marc code and decompose it in a table with: 0-tag 1-indicator1 2-indicator2 3-subfield The first 3 chars always correspond to tag. The indicators are optional. However they must both be indicated, or both ommitted. If indicators are ommitted or indicated with underscore '_', they mean "No indicator". The subfield is optional. It can optionally be preceded by a dot '.' or '$$' or '$' Any of the chars can be replaced by wildcard % THE FUNCTION DOES NOT CHECK WELLFORMNESS OF 'tag' Any empty chars is not considered For example: >> parse_tag('245COc') = ['245', 'C', 'O', 'c'] >> parse_tag('245C_c') = ['245', 'C', '', 'c'] >> parse_tag('245__c') = ['245', '', '', 'c'] >> parse_tag('245__$$c') = ['245', '', '', 'c'] >> parse_tag('245__$c') = ['245', '', '', 'c'] >> parse_tag('245 $c') = ['245', '', '', 'c'] >> parse_tag('245 $$c') = ['245', '', '', 'c'] >> parse_tag('245__.c') = ['245', '', '', 'c'] >> parse_tag('245 .c') = ['245', '', '', 'c'] >> parse_tag('245C_$c') = ['245', 'C', '', 'c'] >> parse_tag('245CO$$c') = ['245', 'C', 'O', 'c'] >> parse_tag('245C_.c') = ['245', 'C', '', 'c'] >> parse_tag('245$c') = ['245', '', '', 'c'] >> parse_tag('245.c') = ['245', '', '', 'c'] >> parse_tag('245$$c') = ['245', '', '', 'c'] >> parse_tag('245__%') = ['245', '', '', ''] >> parse_tag('245__$$%') = ['245', '', '', ''] >> parse_tag('245__$%') = ['245', '', '', ''] >> parse_tag('245 $%') = ['245', '', '', ''] >> parse_tag('245 $$%') = ['245', '', '', ''] >> parse_tag('245$%') = ['245', '', '', ''] >> parse_tag('245.%') = ['245', '', '', ''] >> parse_tag('245$$%') = ['245', '', '', ''] >> parse_tag('2%5$$a') = ['2%5', '', '', 'a'] """ p_tag = ['', '', '', ''] tag = tag.replace(" ", "") #Remove empty characters tag = tag.replace("$", "") #Remove $ characters tag = tag.replace(".", "") #Remove . characters #tag = tag.replace("_", "") #Remove _ characters p_tag[0] = tag[0:3] #tag if len(tag) == 4: p_tag[3] = tag[3] #subfield elif len(tag) == 5: ind1 = tag[3]#indicator 1 if ind1 != "_": p_tag[1] = ind1 ind2 = tag[4]#indicator 2 if ind2 != "_": p_tag[2] = ind2 elif len(tag) == 6: p_tag[3] = tag[5]#subfield ind1 = tag[3]#indicator 1 if ind1 != "_": p_tag[1] = ind1 ind2 = tag[4]#indicator 2 if ind2 != "_": p_tag[2] = ind2 return p_tag def get_format_template(filename, with_attributes=False): """ Returns the structured content of the given formate template. if 'with_attributes' is True, returns the name and description. Else 'attrs' is not returned as key in dictionary (it might, if it has already been loaded previously) {'code':"Some template code" 'attrs': {'name': "a name", 'description': "a description"} } @param filename the filename of an format template @param with_attributes if True, fetch the attributes (names and description) for format' @return strucured content of format template """ #Get from cache whenever possible global format_templates_cache if not filename.endswith("."+format_template_extension): return None if format_templates_cache.has_key(filename): #If we must return with attributes and template exist in cache with attributes #then return cache. Else reload with attributes if with_attributes == True and format_templates_cache[filename].has_key('attrs'): return format_templates_cache[filename] format_template = {'code':""} try: path = "%s%s%s" % (templates_path, os.sep, filename) format_file = open(path) format_content = format_file.read() format_file.close() #Load format template code #Remove name and description code_and_description = pattern_format_template_name.sub("", format_content) code = pattern_format_template_desc.sub("", code_and_description) # Escape % chars in code (because we will use python formatting capabilities) format_template['code'] = code except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))], - file='error', ln=cdslang) + stream='error', ln=cdslang) register_errors(errors, 'error') #Save attributes if necessary if with_attributes: format_template['attrs'] = get_format_template_attrs(filename) #cache and return format_templates_cache[filename] = format_template return format_template def get_format_templates(with_attributes=False): """ Returns the list of all format templates if 'with_attributes' is True, returns the name and description. Else 'attrs' is not returned as key in each dictionary (it might, if it has already been loaded previously) [{'code':"Some template code" 'attrs': {'name': "a name", 'description': "a description"} }, ... } @param with_attributes if True, fetch the attributes (names and description) for formats """ format_templates = {} files = os.listdir(templates_path) for filename in files: if filename.endswith("."+format_template_extension): format_templates[filename] = get_format_template(filename, with_attributes) return format_templates def get_format_template_attrs(filename): """ Returns the attributes of the format template with given filename The attributes are {'name', 'description'} Caution: the function does not check that path exists or that the format element is valid. @param the path to a format element """ attrs = {} attrs['name'] = "" attrs['description'] = "" try: template_file = open("%s%s%s"%(templates_path, os.sep, filename)) code = template_file.read() template_file.close() match = pattern_format_template_name.search(code) if match != None: attrs['name'] = match.group('name') else: attrs['name'] = filename match = pattern_format_template_desc.search(code) if match != None: attrs['description'] = match.group('desc').rstrip('.') except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))], - file='error', ln=cdslang) + stream='error', ln=cdslang) register_errors(errors, 'error') attrs['name'] = filename return attrs def get_format_element(element_name, verbose=0, with_built_in_params=False): """ Returns the format element structured content. Return None if element cannot be loaded (file not found, not readable or invalid) The returned structure is {'attrs': {some attributes in dict. See get_format_element_attrs_from_*} 'code': the_function_code, 'type':"field" or "python" depending if element is defined in file or table} @param element_name the name of the format element to load @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @param with_built_in_params if True, load the parameters built in all elements @return a dictionary with format element attributes """ #Get from cache whenever possible global format_elements_cache #Resolve filename and prepare 'name' as key for the cache filename = resolve_format_element_filename(element_name) if filename != None: name = filename.upper() else: name = element_name.upper() if format_elements_cache.has_key(name): element = format_elements_cache[name] if with_built_in_params == False or (with_built_in_params == True and element['attrs'].has_key('builtin_params') ): return element if filename == None: #element is maybe in tag table if bibformat_dblayer.tag_exists_for_name(element_name): format_element = {'attrs': get_format_element_attrs_from_table(element_name, with_built_in_params), 'code':None, 'type':"field"} #Cache and returns format_elements_cache[name] = format_element return format_element else: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND", element_name)], - file='error', ln=cdslang) + stream='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >=5: sys.stderr.write(errors[0][1]) return None else: format_element = {} module_name = filename if module_name.endswith(".py"): module_name = module_name[:-3] try: module = __import__(elements_import_path+"."+module_name) #Load last module in import path #For eg. load bibformat_elements in invenio.elements.bibformat_element #Used to keep flexibility regarding where elements directory is (for eg. test cases) components = elements_import_path.split(".") for comp in components[1:]: module = getattr(module, comp) function_format = module.__dict__[module_name].format format_element['code'] = function_format format_element['attrs'] = get_format_element_attrs_from_function(function_format, element_name, with_built_in_params) format_element['type'] = "python" #cache and return format_elements_cache[name] = format_element return format_element except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND", element_name)], - file='error', ln=cdslang) + stream='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(str(e)) sys.stderr.write(errors[0][1]) if verbose >= 7: raise e return None def get_format_elements(with_built_in_params=False): """ Returns the list of format elements attributes as dictionary structure Elements declared in files have priority over element declared in 'tag' table The returned object has this format: {element_name1: {'attrs': {'description':..., 'seealso':... 'params':[{'name':..., 'default':..., 'description':...}, ...] 'builtin_params':[{'name':..., 'default':..., 'description':...}, ...] }, 'code': code_of_the_element }, element_name2: {...}, ...} Returns only elements that could be loaded (not error in code) @return a dict of format elements with name as key, and a dict as attributes @param with_built_in_params if True, load the parameters built in all elements """ format_elements = {} mappings = bibformat_dblayer.get_all_name_tag_mappings() for name in mappings: format_elements[name.upper().replace(" ", "_").strip()] = get_format_element(name, with_built_in_params=with_built_in_params) files = os.listdir(elements_path) for filename in files: filename_test = filename.upper().replace(" ", "_") if filename_test.endswith(".PY") and filename.upper() != "__INIT__.PY": if filename_test.startswith("BFE_"): filename_test = filename_test[4:] element_name = filename_test[:-3] element = get_format_element(element_name, with_built_in_params=with_built_in_params) if element != None: format_elements[element_name] = element return format_elements def get_format_element_attrs_from_function(function, element_name, with_built_in_params=False): """ Returns the attributes of the function given as parameter. It looks for standard parameters of the function, default values and comments in the docstring. The attributes are {'description', 'seealso':['element.py', ...], 'params':{name:{'name', 'default', 'description'}, ...], name2:{}} The attributes are {'name' : "name of element" #basically the name of 'name' parameter 'description': "a string description of the element", 'seealso' : ["element_1.py", "element_2.py", ...] #a list of related elements 'params': [{'name':"param_name", #a list of parameters for this element (except 'bfo') 'default':"default value", 'description': "a description"}, ...], 'builtin_params': {name: {'name':"param_name",#the parameters builtin for all elem of this kind 'default':"default value", 'description': "a description"}, ...}, } @param function the formatting function of a format element @param element_name the name of the element @param with_built_in_params if True, load the parameters built in all elements """ attrs = {} attrs['description'] = "" attrs['name'] = element_name.replace(" ", "_").upper() attrs['seealso'] = [] docstring = function.__doc__ if isinstance(docstring, str): #Look for function description in docstring #match = pattern_format_element_desc.search(docstring) description = docstring.split("@param")[0] description = description.split("@see")[0] attrs['description'] = description.strip().rstrip('.') #Look for @see in docstring match = pattern_format_element_seealso.search(docstring) if match != None: elements = match.group('see').rstrip('.').split(",") for element in elements: attrs['seealso'].append(element.strip()) params = {} #Look for parameters in function definition (args, varargs, varkw, defaults) = inspect.getargspec(function) #Prepare args and defaults_list such that we can have a mapping from args to defaults args.reverse() if defaults != None: defaults_list = list(defaults) defaults_list.reverse() else: defaults_list = [] for arg, default in map(None, args, defaults_list): if arg == "bfo": continue #Don't keep this as parameter. It is hidden to users, and exists in all elements of this kind param = {} param['name'] = arg if default == None: param['default'] = "" #In case no check is made inside element, we prefer to print "" (nothing) than None in output else: param['default'] = default param['description'] = "(no description provided)" params[arg] = param if isinstance(docstring, str): #Look for @param descriptions in docstring. #Add description to existing parameters in params dict params_iterator = pattern_format_element_params.finditer(docstring) for match in params_iterator: name = match.group('name') if params.has_key(name): params[name]['description'] = match.group('desc').rstrip('.') attrs['params'] = params.values() #Load built-in parameters if necessary if with_built_in_params == True: builtin_params = [] #Add 'prefix' parameter param_prefix = {} param_prefix['name'] = "prefix" param_prefix['default'] = "" param_prefix['description'] = "A prefix printed only if the record has a value for this element" builtin_params.append(param_prefix) #Add 'suffix' parameter param_suffix = {} param_suffix['name'] = "suffix" param_suffix['default'] = "" param_suffix['description'] = "A suffix printed only if the record has a value for this element" builtin_params.append(param_suffix) #Add 'default' parameter param_default = {} param_default['name'] = "default" param_default['default'] = "" param_default['description'] = "A default value printed if the record has no value for this element" builtin_params.append(param_default) attrs['builtin_params'] = builtin_params return attrs def get_format_element_attrs_from_table(element_name, with_built_in_params=False): """ Returns the attributes of the format element with given name in 'tag' table. Returns None if element_name does not exist in tag table. The attributes are {'name' : "name of element" #basically the name of 'element_name' parameter 'description': "a string description of the element", 'seealso' : [] #a list of related elements. Always empty in this case 'params': [], #a list of parameters for this element. Always empty in this case 'builtin_params': [{'name':"param_name", #the parameters builtin for all elem of this kind 'default':"default value", 'description': "a description"}, ...], 'tags':["950.1", 203.a] #the list of tags printed by this element } @param element_name an element name in database @param element_name the name of the element @param with_built_in_params if True, load the parameters built in all elements """ attrs = {} tags = bibformat_dblayer.get_tags_from_name(element_name) field_label = "field" if len(tags)>1: field_label = "fields" attrs['description'] = "Prints %s %s of the record" % (field_label, ", ".join(tags)) attrs['name'] = element_name.replace(" ", "_").upper() attrs['seealso'] = [] attrs['params'] = [] attrs['tags'] = tags #Load built-in parameters if necessary if with_built_in_params == True: builtin_params = [] #Add 'prefix' parameter param_prefix = {} param_prefix['name'] = "prefix" param_prefix['default'] = "" param_prefix['description'] = "A prefix printed only if the record has a value for this element" builtin_params.append(param_prefix) #Add 'suffix' parameter param_suffix = {} param_suffix['name'] = "suffix" param_suffix['default'] = "" param_suffix['description'] = "A suffix printed only if the record has a value for this element" builtin_params.append(param_suffix) #Add 'separator' parameter param_separator = {} param_separator['name'] = "separator" param_separator['default'] = " " param_separator['description'] = "A separator between elements of the field" builtin_params.append(param_separator) #Add 'nbMax' parameter param_nbMax = {} param_nbMax['name'] = "nbMax" param_nbMax['default'] = "" param_nbMax['description'] = "The maximum number of values to print for this element. No limit if not specified" builtin_params.append(param_nbMax) #Add 'default' parameter param_default = {} param_default['name'] = "default" param_default['default'] = "" param_default['description'] = "A default value printed if the record has no value for this element" builtin_params.append(param_default) attrs['builtin_params'] = builtin_params return attrs def get_output_format(code, with_attributes=False, verbose=0): """ Returns the structured content of the given output format If 'with_attributes' is True, also returns the names and description of the output formats, else 'attrs' is not returned in dict (it might, if it has already been loaded previously). if output format corresponding to 'code' is not found return an empty structure. See get_output_format_attrs() to learn more on the attributes {'rules': [ {'field': "980__a", 'value': "PREPRINT", 'template': "filename_a.bft", }, {...} ], 'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1", 'content_type': "application/ms-excel" } 'default':"filename_b.bft" } @param code the code of an output_format @param with_attributes if True, fetch the attributes (names and description) for format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return strucured content of output format """ output_format = {'rules':[], 'default':""} filename = resolve_output_format_filename(code, verbose) if filename == None: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_OUTPUT_FORMAT_CODE_UNKNOWN", code)], - file='error', ln=cdslang) + stream='error', ln=cdslang) register_errors(errors, 'error') if with_attributes == True: #Create empty attrs if asked for attributes output_format['attrs'] = get_output_format_attrs(code, verbose) return output_format #Get from cache whenever possible global format_outputs_cache if format_outputs_cache.has_key(filename): #If was must return with attributes but cache has not attributes, then load attributes if with_attributes == True and not format_outputs_cache[filename].has_key('attrs'): format_outputs_cache[filename]['attrs'] = get_output_format_attrs(code, verbose) return format_outputs_cache[filename] try: if with_attributes == True: output_format['attrs'] = get_output_format_attrs(code, verbose) path = "%s%s%s" % (outputs_path, os.sep, filename ) format_file = open(path) current_tag = '' for line in format_file: line = line.strip() if line == "": #ignore blank lines continue if line.endswith(":"): #retrieve tag clean_line = line.rstrip(": \n\r") #remove : spaces and eol at the end of line current_tag = "".join(clean_line.split()[1:]).strip() #the tag starts at second position elif line.find('---') != -1: words = line.split('---') template = words[-1].strip() condition = ''.join(words[:-1]) value = "" output_format['rules'].append({'field': current_tag, 'value': condition, 'template': template, }) elif line.find(':') != -1: #Default case default = line.split(':')[1].strip() output_format['default'] = default except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_OUTPUT_FILE", filename, str(e))], - file='error', ln=cdslang) + stream='error', ln=cdslang) register_errors(errors, 'error') #cache and return format_outputs_cache[filename] = output_format return output_format def get_output_format_attrs(code, verbose=0): """ Returns the attributes of an output format. The attributes contain 'code', which is the short identifier of the output format (to be given as parameter in format_record function to specify the output format), 'description', a description of the output format, and 'names', the localized names of the output format. If 'content_type' is specified then the search_engine will send a file with this content type and with result of formatting as content to the user. The 'names' dict always contais 'generic', 'ln' (for long name) and 'sn' (for short names) keys. 'generic' is the default name for output format. 'ln' and 'sn' contain long and short localized names of the output format. Only the languages for which a localization exist are used. {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1", 'content_type': "application/ms-excel" } @param code the short identifier of the format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return strucured content of output format attributes """ if code.endswith("."+format_output_extension): code = code[:-(len(format_output_extension) + 1)] attrs = {'names':{'generic':"", 'ln':{}, 'sn':{}}, 'description':'', 'code':code.upper(), 'content_type':""} filename = resolve_output_format_filename(code, verbose) if filename == None: return attrs attrs['names'] = bibformat_dblayer.get_output_format_names(code) attrs['description'] = bibformat_dblayer.get_output_format_description(code) attrs['content_type'] = bibformat_dblayer.get_output_format_content_type(code) return attrs def get_output_formats(with_attributes=False): """ Returns the list of all output format, as a dictionary with their filename as key If 'with_attributes' is True, also returns the names and description of the output formats, else 'attrs' is not returned in dicts (it might, if it has already been loaded previously). See get_output_format_attrs() to learn more on the attributes {'filename_1.bfo': {'rules': [ {'field': "980__a", 'value': "PREPRINT", 'template': "filename_a.bft", }, {...} ], 'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1" } 'default':"filename_b.bft" }, 'filename_2.bfo': {...}, ... } @return the list of output formats """ output_formats = {} files = os.listdir(outputs_path) for filename in files: if filename.endswith("."+format_output_extension): code = "".join(filename.split(".")[:-1]) output_formats[filename] = get_output_format(code, with_attributes) return output_formats def get_kb_mapping(kb, string, default=""): """ Returns the value of the string' in the knowledge base 'kb'. If kb does not exist or string does not exist in kb, returns 'default' string value. @param kb a knowledge base name @param string a key in a knowledge base @param default a default value if 'string' is not in 'kb' @return the value corresponding to the given string in given kb """ global kb_mappings_cache if kb_mappings_cache.has_key(kb): kb_cache = kb_mappings_cache[kb] if kb_cache.has_key(string): value = kb_mappings_cache[kb][string] if value == None: return default else: return value else: #Precreate for caching this kb kb_mappings_cache[kb] = {} value = bibformat_dblayer.get_kb_mapping_value(kb, string) kb_mappings_cache[kb][str(string)] = value if value == None: return default else: return value def resolve_format_element_filename(string): """ Returns the filename of element corresponding to string This is necessary since format templates code call elements by ignoring case, for eg. is the same as . It is also recommended that format elements filenames are prefixed with bfe_ . We need to look for these too. The name of the element has to start with "BFE_". @param name a name for a format element @return the corresponding filename, with right case """ if not string.endswith(".py"): name = string.replace(" ", "_").upper() +".PY" else: name = string.replace(" ", "_").upper() files = os.listdir(elements_path) for filename in files: test_filename = filename.replace(" ", "_").upper() if test_filename == name or \ test_filename == "BFE_" + name or \ "BFE_" + test_filename == name: return filename #No element with that name found #Do not log error, as it might be a normal execution case: #element can be in database return None def resolve_output_format_filename(code, verbose=0): """ Returns the filename of output corresponding to code This is necessary since output formats names are not case sensitive but most file systems are. @param code the code for an output format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return the corresponding filename, with right case, or None if not found """ code = re.sub(r"[^.0-9a-zA-Z]", "", code) #Remove non alphanumeric chars (except .) if not code.endswith("."+format_output_extension): code = re.sub(r"\W", "", code) code += "."+format_output_extension files = os.listdir(outputs_path) for filename in files: if filename.upper() == code.upper(): return filename #No output format with that name found errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_OUTPUT_NAME", code)], - file='error', ln=cdslang) + stream='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(errors[0][1]) if verbose >= 9: sys.exit(errors[0][1]) return None def get_fresh_format_template_filename(name): """ Returns a new filename and name for template with given name. Used when writing a new template to a file, so that the name has no space, is unique in template directory Returns (unique_filename, modified_name) @param a name for a format template @return the corresponding filename, and modified name if necessary """ #name = re.sub(r"\W", "", name) #Remove non alphanumeric chars name = name.replace(" ", "_") filename = name filename = re.sub(r"[^.0-9a-zA-Z]", "", filename) #Remove non alphanumeric chars (except .) path = templates_path + os.sep + filename + "." + format_template_extension index = 1 while os.path.exists(path): index += 1 filename = name + str(index) path = templates_path + os.sep + filename + "." + format_template_extension if index > 1: returned_name = (name + str(index)).replace("_", " ") else: returned_name = name.replace("_", " ") return (filename + "." + format_template_extension, returned_name) #filename.replace("_", " ")) def get_fresh_output_format_filename(code): """ Returns a new filename for output format with given code. Used when writing a new output format to a file, so that the code has no space, is unique in output format directory. The filename also need to be at most 6 chars long, as the convention is that filename == output format code (+ .extension) We return an uppercase code Returns (unique_filename, modified_code) @param code the code of an output format @return the corresponding filename, and modified code if necessary """ #code = re.sub(r"\W", "", code) #Remove non alphanumeric chars code = code.upper().replace(" ", "_") code = re.sub(r"[^.0-9a-zA-Z]", "", code) #Remove non alphanumeric chars (except .) if len(code) > 6: code = code[:6] filename = code path = outputs_path + os.sep + filename + "." + format_output_extension index = 2 while os.path.exists(path): filename = code + str(index) if len(filename) > 6: filename = code[:-(len(str(index)))]+str(index) index += 1 path = outputs_path + os.sep + filename + "." + format_output_extension #We should not try more than 99999... Well I don't see how we could get there.. Sanity check. if index >= 99999: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_NB_OUTPUTS_LIMIT_REACHED", code)], - file='error', ln=cdslang) + stream='error', ln=cdslang) register_errors(errors, 'error') sys.exit("Output format cannot be named as %s"%code) return (filename + "." + format_output_extension, filename) def clear_caches(): """ Clear the caches (Output Format, Format Templates and Format Elements) """ global format_templates_cache, format_elements_cache , format_outputs_cache, kb_mappings_cache format_templates_cache = {} format_elements_cache = {} format_outputs_cache = {} kb_mappings_cache = {} class BibFormatObject: """ An object that encapsulates a record and associated methods, and that is given as parameter to all format elements 'format' function. The object is made specifically for a given formatting, i.e. it includes for example the language for the formatting. The object provides basic accessors to the record. For full access, one can get the record with get_record() and then use BibRecord methods on the returned object. """ #The record record = None #The language in which the formatting has to be done lang = cdslang #A list of string describing the context in which the record has to be formatted. #It represents the words of the user request in web interface search search_pattern = [] #The id of the record recID = 0 #The user id of the person who will view the formatted page (if applicable) #This allows for example to print a "edit record" link for people #who have right to edit a record. uid = None def __init__(self, recID, ln=cdslang, search_pattern=[], xml_record=None, uid=None): """ Creates a new bibformat object, with given record. You can either specify an record ID to format, or give its xml representation. if 'xml_record' != None, use 'xml_record' instead of recID for the record. 'uid' allows to grant access to some functionalities on a page depending on the user's priviledges. @param recID the id of a record @param ln the language in which the record has to be formatted @param search_pattern list of string representing the request used by the user in web interface @param xml_record a xml string of the record to format @param uid the user id of the person who will view the formatted page """ if xml_record != None: #If record is given as parameter self.record = create_record(xml_record)[0] recID = record_get_field_value(self.record,"001") self.lang = wash_language(ln) self.search_pattern = search_pattern self.recID = recID self.uid = uid def get_record(self): """ Returns the record of this BibFormatObject instance @return the record structure as returned by BibRecord """ #Create record if necessary if self.record == None: record = create_record(record_get_xml(self.recID, 'xm')) self.record = record[0] return self.record def control_field(self, tag): """ Returns the value of control field given by tag in record @param record the record to retrieve values from @param tag the marc code of a field @return value of field tag in record """ if self.get_record() == None: #Case where BibRecord could not parse object return '' p_tag = parse_tag(tag) return record_get_field_value(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) def field(self, tag): """ Returns the value of the field corresponding to tag in the current record. if the value does not exist, return empty string @param record the record to retrieve values from @param tag the marc code of a field @return value of field tag in record """ list_of_fields = self.fields(tag) if len(list_of_fields) > 0: return list_of_fields[0] else: return "" def fields(self, tag): """ Returns the list of values corresonding to "tag". If tag has an undefined subcode (such as 999C5), the function returns a list of dictionaries, whoose keys are the subcodes and the values are the values of tag.subcode. If the tag has a subcode, simply returns list of values corresponding to tag. @param record the record to retrieve values from @param tag the marc code of a field @return values of field tag in record """ if self.get_record() == None: #Case where BibRecord could not parse object return [] p_tag = parse_tag(tag) if p_tag[3] != "": #Subcode has been defined. Simply returns list of values return record_get_field_values(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) else: #Subcode is undefined. Returns list of dicts. #However it might be the case of a control field. list_of_dicts = [] instances = record_get_field_instances(self.get_record(), p_tag[0], p_tag[1], p_tag[2]) for instance in instances: instance_dict = dict(instance[0]) list_of_dicts.append(instance_dict) return list_of_dicts def kb(self, kb, string, default=""): """ Returns the value of the "string" in the knowledge base "kb". If kb does not exist or string does not exist in kb, returns 'default' string or empty string if not specified. @param kb a knowledge base name @param string the string we want to translate @param default a default value returned if 'string' not found in 'kb' """ if string == None: return default val = get_kb_mapping(kb, string, default) if val == None: return default else: return val def bf_profile(): """ Runs a benchmark """ for i in range(50): format_record(i, "HD", ln=cdslang, verbose=9, search_pattern=[]) return if __name__ == "__main__": import profile import pstats bf_profile() profile.run('bf_profile()', "bibformat_profile") p = pstats.Stats("bibformat_profile") p.strip_dirs().sort_stats("cumulative").print_stats() diff --git a/modules/bibformat/lib/bibformatadminlib.py b/modules/bibformat/lib/bibformatadminlib.py index ad1ef3170..f62308c36 100644 --- a/modules/bibformat/lib/bibformatadminlib.py +++ b/modules/bibformat/lib/bibformatadminlib.py @@ -1,1502 +1,1502 @@ # -*- coding: utf-8 -*- ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Handle requests from the web interface to configure BibFormat. """ __lastupdated__ = """$Date$""" import os import re import stat import time from invenio.config import cdslang, weburl, etcdir from invenio.bibformat_config import templates_path, outputs_path, elements_path, format_template_extension from invenio.urlutils import wash_url_argument from invenio.errorlib import get_msgs_for_code_list from invenio.messages import gettext_set_language, wash_language, language_list_long from invenio.search_engine import perform_request_search, encode_for_xml from invenio import bibformat_dblayer from invenio import bibformat_engine import invenio.template bibformat_templates = invenio.template.load('bibformat') def getnavtrail(previous = '', ln=cdslang): """Get the navtrail""" previous = wash_url_argument(previous, 'str') ln = wash_language(ln) _ = gettext_set_language(ln) navtrail = '''%s > %s ''' % (weburl, ln, _("Admin Area"), weburl, ln, _("BibFormat Admin")) navtrail = navtrail + previous return navtrail def perform_request_index(ln=cdslang, warnings=None, is_admin=False): """ Returns the main BibFormat admin page. This is the only page where the code needs to be cleaned when the migration kit will be removed. #TODO: remove when removing migration_kit @param ln language @param warnings a list of messages to display at top of the page, that prevents writability in etc @param is_admin indicate if user is authorized to use BibFormat @return the main admin page """ if warnings != None and len(warnings) > 0: warnings = get_msgs_for_code_list(warnings, 'warning', ln) warnings = [x[1] for x in warnings] # Get only message, not code return bibformat_templates.tmpl_admin_index(ln, warnings, is_admin) def perform_request_format_templates_management(ln=cdslang, checking=0): """ Returns the main management console for format templates @param ln language @param checking the level of checking (0: basic, 1:extensive (time consuming) ) @return the main page for format templates management """ #Reload in case a format was changed bibformat_engine.clear_caches() #get formats lists of attributes formats = bibformat_engine.get_format_templates(with_attributes=True) formats_attrs = [] for filename in formats: attrs = formats[filename]['attrs'] attrs['filename'] = filename attrs['editable'] = can_write_format_template(filename) path = templates_path + os.sep + filename attrs['last_mod_date'] = time.ctime(os.stat(path)[stat.ST_MTIME]) status = check_format_template(filename, checking) if len(status) > 1 or (len(status)==1 and status[0][0] != 'ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE'): status = ''' Not OK ''' % {'weburl':weburl, 'ln':ln, 'bft':filename} else: status = 'OK' attrs['status'] = status formats_attrs.append(attrs) def sort_by_attr(seq): intermed = [ (x['name'], i, x) for i, x in enumerate(seq)] intermed.sort() return [x[-1] for x in intermed] sorted_format_templates = sort_by_attr(formats_attrs) return bibformat_templates.tmpl_admin_format_templates_management(ln, sorted_format_templates) def perform_request_format_template_show(bft, ln=cdslang, code=None, ln_for_preview=cdslang, pattern_for_preview="", content_type_for_preview="text/html"): """ Returns the editor for format templates. @param ln language @param bft the template to edit @param code, the code being edited @param ln_for_preview the language for the preview (for bfo) @param pattern_for_preview the search pattern to be used for the preview (for bfo) @return the main page for formats management """ format_template = bibformat_engine.get_format_template(filename=bft, with_attributes=True) #Either use code being edited, or the original code inside template if code == None: code = format_template['code']#.replace('%%','%') #.replace("<","<").replace(">","/>").replace("&","&") #Build a default pattern if it is empty if pattern_for_preview == "": recIDs = perform_request_search() if len(recIDs) > 0: recID = recIDs[0] pattern_for_preview = "recid:%s" % recID editable = can_write_format_template(bft) #Look for all existing content_types content_types = bibformat_dblayer.get_existing_content_types() return bibformat_templates.tmpl_admin_format_template_show(ln, format_template['attrs']['name'], format_template['attrs']['description'], code, bft, ln_for_preview=ln_for_preview, pattern_for_preview=pattern_for_preview, editable=editable, content_type_for_preview=content_type_for_preview, content_types=content_types) def perform_request_format_template_show_dependencies(bft, ln=cdslang): """ Show the dependencies (on elements) of the given format. @param ln language @param bft the filename of the template to show """ format_template = bibformat_engine.get_format_template(filename=bft, with_attributes=True) name = format_template['attrs']['name'] output_formats = get_outputs_that_use_template(bft) format_elements = get_elements_used_by_template(bft) tags = [] for output_format in output_formats: for tag in output_format['tags']: tags.append(tag) for format_element in format_elements: for tag in format_element['tags']: tags.append(tag) tags.sort() return bibformat_templates.tmpl_admin_format_template_show_dependencies(ln, name, bft, output_formats, format_elements, tags) def perform_request_format_template_show_attributes(bft, ln=cdslang): """ Page for template name and descrition attributes edition. @param ln language @param bft the template to edit @return the main page for format templates attributes edition """ format_template = bibformat_engine.get_format_template(filename=bft, with_attributes=True) name = format_template['attrs']['name'] description = format_template['attrs']['description'] editable = can_write_format_template(bft) return bibformat_templates.tmpl_admin_format_template_show_attributes(ln, name, description, bft, editable) def perform_request_format_template_show_short_doc(ln=cdslang, search_doc_pattern=""): """ Returns the format elements documentation to be included inside format templated editor. Keep only elements that have 'search_doc_pattern' text inside description, if pattern not empty @param ln language @param search_doc_pattern a search pattern that specified which elements to display @return a brief version of the format element documentation """ #get format elements lists of attributes elements = bibformat_engine.get_format_elements(with_built_in_params=True) keys = elements.keys() keys.sort() elements = map(elements.get, keys) def filter_elem(element): """Keep element if is string representation contains all keywords of search_doc_pattern, and if its name does not start with a number (to remove 'garbage' from elements in tags table)""" if element['type'] != 'python' and \ element['attrs']['name'][0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']: return False text = str(element).upper() #Basic text representation if search_doc_pattern != "": for word in search_doc_pattern.split(): if word.upper() != "AND" and text.find(word.upper()) == -1: return False return True elements = filter(filter_elem, elements) return bibformat_templates.tmpl_admin_format_template_show_short_doc(ln, elements) def perform_request_format_elements_documentation(ln=cdslang): """ Returns the main management console for format elements. Includes list of format elements and associated administration tools. @param ln language @return the main page for format elements management """ #get format elements lists of attributes elements = bibformat_engine.get_format_elements(with_built_in_params=True) keys = elements.keys() keys.sort() elements = map(elements.get, keys) #Remove all elements found in table and that begin with a number (to remove 'garbage') filtered_elements = [element for element in elements if element['type'] == 'python' or \ element['attrs']['name'][0] not in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']] return bibformat_templates.tmpl_admin_format_elements_documentation(ln, filtered_elements) def perform_request_format_element_show_dependencies(bfe, ln=cdslang): """ Show the dependencies of the given format. @param ln language @param bfe the filename of the format element to show """ format_templates = get_templates_that_use_element(bfe) tags = get_tags_used_by_element(bfe) return bibformat_templates.tmpl_admin_format_element_show_dependencies(ln, bfe, format_templates, tags) def perform_request_format_element_test(bfe, ln=cdslang, param_values=None, uid=None): """ Show the dependencies of the given format. 'param_values' is the list of values to pass to 'format' function of the element as parameters, in the order ... If params is None, this means that they have not be defined by user yet. @param ln language @param bfe the name of the format element to show @param params the list of parameters to pass to element format function @param uid the user id for this request """ _ = gettext_set_language(ln) format_element = bibformat_engine.get_format_element(bfe, with_built_in_params=True) #Load parameter names and description ## param_names = [] param_descriptions = [] #First value is a search pattern to choose the record param_names.append(_("Test with record:")) # Caution: keep in sync with same text below param_descriptions.append(_("Enter a search query here.")) #Parameters defined in this element for param in format_element['attrs']['params']: param_names.append(param['name']) param_descriptions.append(param['description']) #Parameters common to all elements of a kind for param in format_element['attrs']['builtin_params']: param_names.append(param['name']) param_descriptions.append(param['description']) #Load parameters values ## if param_values == None: #First time the page is loaded param_values = [] #Propose an existing record id by default recIDs = perform_request_search() if len(recIDs) > 0: recID = recIDs[0] param_values.append("recid:%s" % recID) #Default values defined in this element for param in format_element['attrs']['params']: param_values.append(param['default']) #Parameters common to all elements of a kind for param in format_element['attrs']['builtin_params']: param_values.append(param['default']) #Execute element with parameters ## params = dict(zip(param_names, param_values)) #Find a record corresponding to search pattern search_pattern = params[_("Test with record:")] # Caution keep in sync with same text above and below recIDs = perform_request_search(p=search_pattern) del params[_("Test with record:")] # Caution keep in sync with same text above if len(recIDs) > 0: bfo = bibformat_engine.BibFormatObject(recIDs[0], ln, search_pattern, None, uid) (result, errors) = bibformat_engine.eval_format_element(format_element, bfo, params) else: result = get_msgs_for_code_list([("ERR_BIBFORMAT_NO_RECORD_FOUND_FOR_PATTERN", search_pattern)], - file='error', ln=cdslang)[0][1] + stream='error', ln=cdslang)[0][1] return bibformat_templates.tmpl_admin_format_element_test(ln, bfe, format_element['attrs']['description'], param_names, param_values, param_descriptions, result) def perform_request_output_formats_management(ln=cdslang, sortby="code"): """ Returns the main management console for output formats. Includes list of output formats and associated administration tools. @param ln language @param sortby the sorting crieteria (can be 'code' or 'name') @return the main page for output formats management """ #Reload in case a format was changed bibformat_engine.clear_caches() #get output formats lists of attributes output_formats_list = bibformat_engine.get_output_formats(with_attributes=True) output_formats = {} for filename in output_formats_list: output_format = output_formats_list[filename] code = output_format['attrs']['code'] path = outputs_path + os.sep + filename output_format['editable'] = can_write_output_format(code) output_format['last_mod_date'] = time.ctime(os.stat(path)[stat.ST_MTIME]) #Validate the output format status = check_output_format(code) # If there is an error but the error is just 'format is not writable', do not display as error if len(status) > 1 or (len(status)==1 and status[0][0] != 'ERR_BIBFORMAT_CANNOT_WRITE_OUTPUT_FILE'): status = ''' Not OK ''' % {'weburl':weburl, 'ln':ln, 'bfo':code} else: status = 'OK' output_format['status'] = status output_formats[filename] = output_format #sort according to code or name, inspired from Python Cookbook def get_attr(dic, attr): if attr == "code": return dic['attrs']['code'] else: return dic['attrs']['names']['generic'] def sort_by_attr(seq, attr): intermed = [ (get_attr(x, attr), i, x) for i, x in enumerate(seq)] intermed.sort() return [x[-1] for x in intermed] if sortby != "code" and sortby != "name": sortby = "code" sorted_output_formats = sort_by_attr(output_formats.values(), sortby) return bibformat_templates.tmpl_admin_output_formats_management(ln, sorted_output_formats) def perform_request_output_format_show(bfo, ln=cdslang, r_fld=[], r_val=[], r_tpl=[], default="", r_upd="", args={}): """ Returns the editing tools for a given output format. The page either shows the output format from file, or from user's POST session, as we want to let him edit the rules without saving. Policy is: r_fld, r_val, rules_tpl are list of attributes of the rules. If they are empty, load from file. Else use POST. The i th value of each list is one of the attributes of rule i. Rule i is the i th rule in order of evaluation. All list have the same number of item. r_upd contains an action that has to be performed on rules. It can composed of a number (i, the rule we want to modify) and an operator : "save" to save the rules, "add" or "del". syntax: operator [number] For eg: r_upd = _("Save Changes") saves all rules (no int should be specified). For eg: r_upd = _("Add New Rule") adds a rule (no int should be specified). For eg: r_upd = _("Remove Rule") + " 5" deletes rule at position 5. The number is used only for operation delete. An action can also be in **args. We must look there for string starting with '(+|-) [number]' to increase (+) or decrease (-) a rule given by its index (number). For example "+ 5" increase priority of rule 5 (put it at fourth position). The string in **args can be followed by some garbage that looks like .x or .y, as this is returned as the coordinate of the click on the . We HAVE to use args and reason on its keys, because for of type image, iexplorer does not return the value of the tag, but only the name. Action is executed only if we are working from user's POST session (means we must have loaded the output format first, which is totally normal and expected behaviour) IMPORTANT: we display rules evaluation index starting at 1 in interface, but we start internally at 0 @param ln language @param bfo the filename of the output format to show @param r_fld the list of 'field' attribute for each rule @param r_val the list of 'value' attribute for each rule @param r_tpl the list of 'template' attribute for each rule @param default the default format template used by this output format @param r_upd the rule that we want to increase/decrease in order of evaluation """ output_format = bibformat_engine.get_output_format(bfo, with_attributes=True) format_templates = bibformat_engine.get_format_templates(with_attributes=True) name = output_format['attrs']['names']['generic'] rules = [] debug = "" if len(r_fld) == 0 and r_upd=="": #Retrieve rules from file rules = output_format['rules'] default = output_format['default'] else: #Retrieve rules from given lists #Transform a single rule (not considered as a list with length #1 by the templating system) into a list if not isinstance(r_fld, list): r_fld = [r_fld] r_val = [r_val] r_tpl = [r_tpl] for i in range(len(r_fld)): rule = {'field': r_fld[i], 'value': r_val[i], 'template': r_tpl[i]} rules.append(rule) #Execute action _ = gettext_set_language(ln) if r_upd.startswith(_("Remove Rule")): #Remove rule index = int(r_upd.split(" ")[-1]) -1 del rules[index] elif r_upd.startswith(_("Save Changes")): #Save update_output_format_rules(bfo, rules, default) elif r_upd.startswith(_("Add New Rule")): #Add new rule rule = {'field': "", 'value': "", 'template': ""} rules.append(rule) else: #Get the action in 'args' #The action must be constructed from string of the kind: # + 5 or - 4 or + 5.x or -4.y for button_val in args.keys():#for all elements of form not handled yet action = button_val.split(" ") if action[0] == '-' or action[0] == '+': index = int(action[1].split(".")[0]) -1 if action[0] == '-': #Decrease priority rule = rules[index] del rules[index] rules.insert(index + 1, rule) #debug = 'Decrease rule '+ str(index) break elif action[0] == '+': #Increase priority rule = rules[index] del rules[index] rules.insert(index - 1, rule) #debug = 'Increase rule ' + str(index) break editable = can_write_output_format(bfo) return bibformat_templates.tmpl_admin_output_format_show(ln, bfo, name, rules, default, format_templates, editable) def perform_request_output_format_show_dependencies(bfo, ln=cdslang): """ Show the dependencies of the given format. @param ln language @param bfo the filename of the output format to show """ output_format = bibformat_engine.get_output_format(code=bfo, with_attributes=True) name = output_format['attrs']['names']['generic'] format_templates = get_templates_used_by_output(bfo) return bibformat_templates.tmpl_admin_output_format_show_dependencies(ln, name, bfo, format_templates) def perform_request_output_format_show_attributes(bfo, ln=cdslang): """ Page for output format names and description attributes edition. @param ln language @param bfo filename of output format to edit @return the main page for output format attributes edition """ output_format = bibformat_engine.get_output_format(code=bfo, with_attributes=True) name = output_format['attrs']['names']['generic'] description = output_format['attrs']['description'] content_type = output_format['attrs']['content_type'] #Get translated names. Limit to long names now. #Translation are given in order of languages in language_list_long() names_trans = [] for lang in language_list_long(): name_trans = output_format['attrs']['names']['ln'].get(lang[0], "") names_trans.append({'lang':lang[1], 'trans':name_trans}) editable = can_write_output_format(bfo) return bibformat_templates.tmpl_admin_output_format_show_attributes(ln, name, description, content_type, bfo, names_trans, editable) def perform_request_knowledge_bases_management(ln=cdslang): """ Returns the main page for knowledge bases management. @param ln language @return the main page for knowledge bases management """ kbs = bibformat_dblayer.get_kbs() return bibformat_templates.tmpl_admin_kbs_management(ln, kbs) def perform_request_knowledge_base_show(kb_id, ln=cdslang, sortby="to"): """ Show the content of a knowledge base @param ln language @param kb a knowledge base id @param sortby the sorting criteria ('from' or 'to') @return the content of the given knowledge base """ name = bibformat_dblayer.get_kb_name(kb_id) mappings = bibformat_dblayer.get_kb_mappings(name, sortby) return bibformat_templates.tmpl_admin_kb_show(ln, kb_id, name, mappings, sortby) def perform_request_knowledge_base_show_attributes(kb_id, ln=cdslang, sortby="to"): """ Show the attributes of a knowledge base @param ln language @param kb a knowledge base id @param sortby the sorting criteria ('from' or 'to') @return the content of the given knowledge base """ name = bibformat_dblayer.get_kb_name(kb_id) description = bibformat_dblayer.get_kb_description(name) return bibformat_templates.tmpl_admin_kb_show_attributes(ln, kb_id, name, description, sortby) def perform_request_knowledge_base_show_dependencies(kb_id, ln=cdslang, sortby="to"): """ Show the dependencies of a kb @param ln language @param kb a knowledge base id @param sortby the sorting criteria ('from' or 'to') @return the dependencies of the given knowledge base """ name = bibformat_dblayer.get_kb_name(kb_id) format_elements = get_elements_that_use_kb(name) return bibformat_templates.tmpl_admin_kb_show_dependencies(ln, kb_id, name, sortby, format_elements) def add_format_template(): """ Adds a new format template (mainly create file with unique name) @return the filename of the created format """ (filename, name) = bibformat_engine.get_fresh_format_template_filename("Untitled") out = '%(name)s' % {'name':name} path = templates_path + os.sep + filename format = open(path, 'w') format.write(out) format.close return filename def delete_format_template(filename): """ Delete a format template given by its filename If format template is not writable, do not remove @param filename the format template filename """ if not can_write_format_template(filename): return path = templates_path + os.sep + filename os.remove(path) bibformat_engine.clear_caches() def update_format_template_code(filename, code=""): """ Saves code inside template given by filename """ format_template = bibformat_engine.get_format_template_attrs(filename) name = format_template['name'] description = format_template['description'] out = ''' %(name)s %(description)s %(code)s ''' % {'name':name, 'description':description, 'code':code} path = templates_path + os.sep + filename format = open(path, 'w') format.write(out) format.close bibformat_engine.clear_caches() def update_format_template_attributes(filename, name="", description=""): """ Saves name and description inside template given by filename. the filename must change according to name, and every output format having reference to filename must be updated. If name already exist, use fresh filename (we never overwrite other templates) amd remove old one. @return the filename of the modified format """ format_template = bibformat_engine.get_format_template(filename, with_attributes=True) code = format_template['code'] if format_template['attrs']['name'] != name: #name has changed, so update filename old_filename = filename old_path = templates_path + os.sep + old_filename #Remove old one os.remove(old_path) (filename, name) = bibformat_engine.get_fresh_format_template_filename(name) #Change output formats that calls this template output_formats = bibformat_engine.get_output_formats() for output_format_filename in output_formats: if can_read_output_format(output_format_filename) and can_write_output_format(output_format_filename): output_path = outputs_path + os.sep + output_format_filename format = open(output_path, 'r') output_text = format.read() format.close output_pattern = re.compile("---(\s)*" + old_filename, re.IGNORECASE) mod_output_text = output_pattern.sub("--- " + filename, output_text) if output_text != mod_output_text: format = open(output_path, 'w') format.write(mod_output_text) format.close #Write updated format template out = '''%(name)s%(description)s%(code)s''' % {'name':name, 'description':description, 'code':code} path = templates_path + os.sep + filename format = open(path, 'w') format.write(out) format.close bibformat_engine.clear_caches() return filename def add_output_format(): """ Adds a new output format (mainly create file with unique name) @return the code of the created format """ (filename, code) = bibformat_engine.get_fresh_output_format_filename("UNTLD") #Add entry in database bibformat_dblayer.add_output_format(code) bibformat_dblayer.set_output_format_name(code, "Untitled", lang="generic") bibformat_dblayer.set_output_format_content_type(code, "text/html") #Add file out = "" path = outputs_path + os.sep + filename format = open(path, 'w') format.write(out) format.close return code def delete_output_format(code): """ Delete a format template given by its code if file is not writable, don't remove @param code the 6 letters code of the output format to remove """ if not can_write_output_format(code): return #Remove entry from database bibformat_dblayer.remove_output_format(code) #Remove file filename = bibformat_engine.resolve_output_format_filename(code) path = outputs_path + os.sep + filename os.remove(path) bibformat_engine.clear_caches() def update_output_format_rules(code, rules=[], default=""): """ Saves rules inside output format given by code """ #Generate output format syntax #Try to group rules by field previous_field = "" out = "" for rule in rules: field = rule["field"] value = rule["value"] template = rule["template"] if previous_field != field: out += "tag %s:\n" % field out +="%(value)s --- %(template)s\n" % {'value':value, 'template':template} previous_field = field out += "default: %s" % default filename = bibformat_engine.resolve_output_format_filename(code) path = outputs_path + os.sep + filename format = open(path, 'w') format.write(out) format.close bibformat_engine.clear_caches() def update_output_format_attributes(code, name="", description="", new_code="", content_type="", names_trans=[]): """ Saves name and description inside output format given by filename. If new_code already exist, use fresh code (we never overwrite other output). @param description the new description @param name the new name @param code the new short code (== new bfo) of the output format @param code the code of the output format to update @param names_trans the translations in the same order as the languages from get_languages() @param content_type the new content_type of the output format @return the filename of the modified format """ bibformat_dblayer.set_output_format_description(code, description) bibformat_dblayer.set_output_format_content_type(code, content_type) bibformat_dblayer.set_output_format_name(code, name, lang="generic") i = 0 for lang in language_list_long(): if names_trans[i] != "": bibformat_dblayer.set_output_format_name(code, names_trans[i], lang[0]) i += 1 new_code = new_code.upper() if code != new_code: #If code has changed, we must update filename with a new unique code old_filename = bibformat_engine.resolve_output_format_filename(code) old_path = outputs_path + os.sep + old_filename (new_filename, new_code) = bibformat_engine.get_fresh_output_format_filename(new_code) new_path = outputs_path + os.sep + new_filename os.rename(old_path, new_path) bibformat_dblayer.change_output_format_code(code, new_code) bibformat_engine.clear_caches() return new_code def add_kb_mapping(kb_name, key, value=""): """ Adds a new mapping to given kb @param kb_name the name of the kb where to insert the new value @param key the key of the mapping @param value the value of the mapping """ bibformat_dblayer.add_kb_mapping(kb_name, key, value) def remove_kb_mapping(kb_name, key): """ Delete an existing kb mapping in kb @param kb_name the name of the kb where to insert the new value @param key the key of the mapping """ bibformat_dblayer.remove_kb_mapping(kb_name, key) def update_kb_mapping(kb_name, old_key, key, value): """ Update an existing kb mapping with key old_key with a new key and value @param kb_name the name of the kb where to insert the new value @param the key of the mapping in the kb @param key the new key of the mapping @param value the new value of the mapping """ bibformat_dblayer.update_kb_mapping(kb_name, old_key, key, value) def get_kb_name(kb_id): """ Returns the name of the kb given by id """ return bibformat_dblayer.get_kb_name(kb_id) def update_kb_attributes(kb_name, new_name, new_description): """ Updates given kb_name with a new name and new description @param kb_name the name of the kb to update @param new_name the new name for the kb @param new_description the new description for the kb """ bibformat_dblayer.update_kb(kb_name, new_name, new_description) def add_kb(kb_name="Untitled"): """ Adds a new kb in database, and returns its id The name of the kb will be 'Untitled#' such that it is unique. @param kb_name the name of the kb @return the id of the newly created kb """ name = kb_name i = 1 while bibformat_dblayer.kb_exists(name): name = kb_name + " " + str(i) i += 1 kb_id = bibformat_dblayer.add_kb(name, "") return kb_id def delete_kb(kb_name): """ Deletes given kb from database """ bibformat_dblayer.delete_kb(kb_name) def can_read_format_template(filename): """ Returns 0 if we have read permission on given format template, else returns other integer """ path = "%s%s%s" % (templates_path, os.sep, filename) return os.access(path, os.R_OK) def can_read_output_format(bfo): """ Returns 0 if we have read permission on given output format, else returns other integer """ filename = bibformat_engine.resolve_output_format_filename(bfo) path = "%s%s%s" % (outputs_path, os.sep, filename) return os.access(path, os.R_OK) def can_read_format_element(name): """ Returns 0 if we have read permission on given format element, else returns other integer """ filename = bibformat_engine.resolve_format_element_filename(name) path = "%s%s%s" % (elements_path, os.sep, filename) return os.access(path, os.R_OK) def can_write_format_template(bft): """ Returns 0 if we have write permission on given format template, else returns other integer """ if not can_read_format_template(bft): return False path = "%s%s%s" % (templates_path, os.sep, bft) return os.access(path, os.W_OK) def can_write_output_format(bfo): """ Returns 0 if we have write permission on given output format, else returns other integer """ if not can_read_output_format(bfo): return False filename = bibformat_engine.resolve_output_format_filename(bfo) path = "%s%s%s" % (outputs_path, os.sep, filename) return os.access(path, os.W_OK) def can_write_etc_bibformat_dir(): """ Returns true if we can write in etc/bibformat dir. """ path = "%s%sbibformat" % (etcdir, os.sep) return os.access(path, os.W_OK) def get_outputs_that_use_template(filename): """ Returns a list of output formats that call the given format template. The returned output formats also give their dependencies on tags. We don't return the complete output formats but some reference to them (filename + names) [ {'filename':"filename_1.bfo" 'names': {'en':"a name", 'fr': "un nom", 'generic':"a name"} 'tags': ['710__a', '920__'] }, ... ] Returns output formats references sorted by (generic) name @param filename a format template filename """ output_formats_list = {} tags = [] output_formats = bibformat_engine.get_output_formats(with_attributes=True) for output_format in output_formats: name = output_formats[output_format]['attrs']['names']['generic'] #First look at default template, and add it if necessary if output_formats[output_format]['default'] == filename: output_formats_list[name] = {'filename':output_format, 'names':output_formats[output_format]['attrs']['names'], 'tags':[]} #Second look at each rule found = False for rule in output_formats[output_format]['rules']: if rule['template'] == filename: found = True tags.append(rule['field']) #Also build dependencies on tags #Finally add dependency on template from rule (overwrite default dependency, #which is weaker in term of tag) if found == True: output_formats_list[name] = {'filename':output_format, 'names':output_formats[output_format]['attrs']['names'], 'tags':tags} keys = output_formats_list.keys() keys.sort() return map(output_formats_list.get, keys) def get_elements_used_by_template(filename): """ Returns a list of format elements that are called by the given format template. The returned elements also give their dependencies on tags The list is returned sorted by name [ {'filename':"filename_1.py" 'name':"filename_1" 'tags': ['710__a', '920__'] }, ... ] Returns elements sorted by name @param filename a format template filename """ format_elements = {} format_template = bibformat_engine.get_format_template(filename=filename, with_attributes=True) code = format_template['code'] format_elements_iter = bibformat_engine.pattern_tag.finditer(code) for result in format_elements_iter: function_name = result.group("function_name").lower() if function_name != None and not format_elements.has_key(function_name): filename = bibformat_engine.resolve_format_element_filename("BFE_"+function_name) if filename != None: tags = get_tags_used_by_element(filename) format_elements[function_name] = {'name':function_name.lower(), 'filename':filename, 'tags':tags} keys = format_elements.keys() keys.sort() return map(format_elements.get, keys) # Format Elements Dependencies ## def get_tags_used_by_element(filename): """ Returns a list of tags used by given format element APPROXIMATIVE RESULTS: the tag are retrieved in field(), fields() and control_field() function. If they are used computed, or saved in a variable somewhere else, they are not retrieved @TODO: There is room for improvements. For example catch call to BibRecord functions, or use of Returns tags sorted by value @param filename a format element filename """ tags = {} format_element = bibformat_engine.get_format_element(filename) if format_element == None: return [] elif format_element['type']=="field": tags = format_element['attrs']['tags'] return tags filename = bibformat_engine.resolve_format_element_filename(filename) path = elements_path + os.sep + filename format = open(path, 'r') code = format.read() format.close tags_pattern = re.compile(''' (field|fields|control_field)\s* #Function call \(\s* #Opening parenthesis [\'"]+ #Single or double quote (?P.+?) #Tag [\'"]+\s* #Single or double quote \) #Closing parenthesis ''', re.VERBOSE | re.MULTILINE) tags_iter = tags_pattern.finditer(code) for result in tags_iter: tags[result.group("tag")] = result.group("tag") return tags.values() def get_templates_that_use_element(name): """ Returns a list of format templates that call the given format element. The returned format templates also give their dependencies on tags. [ {'filename':"filename_1.bft" 'name': "a name" 'tags': ['710__a', '920__'] }, ... ] Returns templates sorted by name @param name a format element name """ format_templates = {} tags = [] files = os.listdir(templates_path) #Retrieve all templates for file in files: if file.endswith(format_template_extension): format_elements = get_elements_used_by_template(file) #Look for elements used in template format_elements = map(lambda x: x['name'].lower(), format_elements) try: #Look for element format_elements.index(name.lower()) #If not found, get out of "try" statement format_template = bibformat_engine.get_format_template(filename=file, with_attributes=True) template_name = format_template['attrs']['name'] format_templates[template_name] = {'name':template_name, 'filename':file} except: print name+" not found in "+str(format_elements) pass keys = format_templates.keys() keys.sort() return map(format_templates.get, keys) # Output Formats Dependencies ## def get_templates_used_by_output(code): """ Returns a list of templates used inside an output format give by its code The returned format templates also give their dependencies on elements and tags [ {'filename':"filename_1.bft" 'name': "a name" 'elements': [{'filename':"filename_1.py", 'name':"filename_1", 'tags': ['710__a', '920__'] }, ...] }, ... ] Returns templates sorted by name """ format_templates = {} output_format = bibformat_engine.get_output_format(code, with_attributes=True) filenames = map(lambda x: x['template'], output_format['rules']) if output_format['default'] != "": filenames.append(output_format['default']) for filename in filenames: template = bibformat_engine.get_format_template(filename, with_attributes=True) name = template['attrs']['name'] elements = get_elements_used_by_template(filename) format_templates[name] = {'name':name, 'filename':filename, 'elements':elements} keys = format_templates.keys() keys.sort() return map(format_templates.get, keys) # Knowledge Bases Dependencies ## def get_elements_that_use_kb(name): """ Returns a list of elements that call given kb [ {'filename':"filename_1.py" 'name': "a name" }, ... ] Returns elements sorted by name """ format_elements = {} files = os.listdir(elements_path) #Retrieve all elements in files for filename in files: if filename.endswith(".py"): path = elements_path + os.sep + filename format = open(path, 'r') code = format.read() format.close #Search for use of kb inside code kb_pattern = re.compile(''' (bfo.kb)\s* #Function call \(\s* #Opening parenthesis [\'"]+ #Single or double quote (?P%s) #kb [\'"]+\s* #Single or double quote , #comma ''' % name, re.VERBOSE | re.MULTILINE | re.IGNORECASE) result = kb_pattern.search(code) if result != None: name = ("".join(filename.split(".")[:-1])).lower() if name.startswith("bfe_"): name = name[4:] format_elements[name] = {'filename':filename, 'name': name} keys = format_elements.keys() keys.sort() return map(format_elements.get, keys) # Validation tools ## def perform_request_format_validate(ln=cdslang, bfo=None, bft=None, bfe=None): """ Returns a page showing the status of an output format or format template or format element. This page is called from output formats management page or format template management page or format elements documentation. The page only shows the status of one of the format, depending on the specified one. If multiple are specified, shows the first one. @param ln language @param bfo an output format 6 chars code @param bft a format element filename @param bfe a format element name """ if bfo != None: errors = check_output_format(bfo) messages = get_msgs_for_code_list(code_list = errors, ln=ln) elif bft != None: errors = check_format_template(bft, checking=1) messages = get_msgs_for_code_list(code_list = errors, ln=ln) elif bfe != None: errors = check_format_element(bfe) messages = get_msgs_for_code_list(code_list = errors, ln=ln) if messages == None: messages = [] messages = map(lambda x: encode_for_xml(x[1]), messages) return bibformat_templates.tmpl_admin_validate_format(ln, messages) def check_output_format(code): """ Returns the list of errors in the output format given by code The errors are the formatted errors defined in bibformat_config.py file. @param code the 6 chars code of the output format to check @return a list of errors """ errors = [] filename = bibformat_engine.resolve_output_format_filename(code) if can_read_output_format(code): path = outputs_path + os.sep + filename format = open(path) current_tag = '' i = 0 for line in format: i += 1 if line.strip() == "": #ignore blank lines continue clean_line = line.rstrip("\n\r ") #remove spaces and eol if line.strip().endswith(":") or (line.strip().lower().startswith("tag") and line.find('---') == -1): #check tag if not clean_line.endswith(":"): #column misses at the end of line errors.append(("ERR_BIBFORMAT_OUTPUT_RULE_FIELD_COL", line, i)) if not clean_line.lower().startswith("tag"): #tag keyword is missing errors.append(("ERR_BIBFORMAT_OUTPUT_TAG_MISSING", line, i)) elif not clean_line.startswith("tag"): #tag was not lower case errors.append(("ERR_BIBFORMAT_OUTPUT_WRONG_TAG_CASE", line, i)) clean_line = clean_line.rstrip(": ") #remove : and spaces at the end of line current_tag = "".join(clean_line.split()[1:]).strip() #the tag starts at second position if len(clean_line.split()) > 2: #We should only have 'tag' keyword and tag errors.append(("ERR_BIBFORMAT_INVALID_OUTPUT_RULE_FIELD", i)) else: if len(check_tag(current_tag)) > 0: #Invalid tag errors.append(("ERR_BIBFORMAT_INVALID_OUTPUT_RULE_FIELD_tag", current_tag, i)) if not clean_line.startswith("tag"): errors.append(("ERR_BIBFORMAT_INVALID_OUTPUT_RULE_FIELD", i)) elif line.find('---') != -1: #check condition if current_tag == "": errors.append(("ERR_BIBFORMAT_OUTPUT_CONDITION_OUTSIDE_FIELD", line, i)) words = line.split('---') if len(words) != 2: errors.append(("ERR_BIBFORMAT_INVALID_OUTPUT_CONDITION", line, i)) template = words[-1].strip() path = templates_path + os.sep + template if not os.path.exists(path): errors.append(("ERR_BIBFORMAT_WRONG_OUTPUT_RULE_TEMPLATE_REF", template, i)) elif line.find(':') != -1 or (line.strip().lower().startswith("default") and line.find('---') == -1): #check default template clean_line = line.strip() if line.find(':') == -1: #column misses after default errors.append(("ERR_BIBFORMAT_OUTPUT_RULE_DEFAULT_COL", line, i)) if not clean_line.startswith("default"): #default keyword is missing errors.append(("ERR_BIBFORMAT_OUTPUT_DEFAULT_MISSING", line, i)) if not clean_line.startswith("default"): #default was not lower case errors.append(("ERR_BIBFORMAT_OUTPUT_WRONG_DEFAULT_CASE", line, i)) default = "".join(line.split(':')[1]).strip() path = templates_path + os.sep + default if not os.path.exists(path): errors.append(("ERR_BIBFORMAT_WRONG_OUTPUT_RULE_TEMPLATE_REF", default, i)) else: #check others errors.append(("ERR_BIBFORMAT_WRONG_OUTPUT_LINE", line, i)) else: errors.append(("ERR_BIBFORMAT_CANNOT_READ_OUTPUT_FILE", filename, "")) return errors def check_format_template(filename, checking=0): """ Returns the list of errors in the format template given by its filename The errors are the formatted errors defined in bibformat_config.py file. @param filename the filename of the format template to check @param checking the level of checking (0:basic, >=1 extensive (time-consuming)) @return a list of errors """ errors = [] if can_read_format_template(filename):#Can template be read? #format_template = bibformat_engine.get_format_template(filename, with_attributes=True) format = open("%s%s%s" % (templates_path, os.sep, filename)) code = format.read() format.close() #Look for name match = bibformat_engine.pattern_format_template_name.search(code) if match == None:#Is tag defined in template? errors.append(("ERR_BIBFORMAT_TEMPLATE_HAS_NO_NAME", filename)) #Look for description match = bibformat_engine.pattern_format_template_desc.search(code) if match == None:#Is tag defined in template? errors.append(("ERR_BIBFORMAT_TEMPLATE_HAS_NO_DESCRIPTION", filename)) format_template = bibformat_engine.get_format_template(filename, with_attributes=False) code = format_template['code'] #Look for calls to format elements #Check existence of elements and attributes used in call elements_call = bibformat_engine.pattern_tag.finditer(code) for element_match in elements_call: element_name = element_match.group("function_name") filename = bibformat_engine.resolve_format_element_filename(element_name) if filename == None and not bibformat_dblayer.tag_exists_for_name(element_name): #Is element defined? errors.append(("ERR_BIBFORMAT_TEMPLATE_CALLS_UNDEFINED_ELEM", filename, element_name)) else: format_element = bibformat_engine.get_format_element(element_name, with_built_in_params=True) if format_element == None:#Can element be loaded? if not can_read_format_element(element_name): errors.append(("ERR_BIBFORMAT_TEMPLATE_CALLS_UNREADABLE_ELEM", filename, element_name)) else: errors.append(("ERR_BIBFORMAT_TEMPLATE_CALLS_UNLOADABLE_ELEM", element_name, filename)) else: #are the parameters used defined in element? params_call = bibformat_engine.pattern_function_params.finditer(element_match.group()) all_params = {} for param_match in params_call: param = param_match.group("param") value = param_match.group("value") all_params[param] = value allowed_params = [] #Built-in params for allowed_param in format_element['attrs']['builtin_params']: allowed_params.append(allowed_param['name']) #Params defined in element for allowed_param in format_element['attrs']['params']: allowed_params.append(allowed_param['name']) if not param in allowed_params: errors.append(("ERR_BIBFORMAT_TEMPLATE_WRONG_ELEM_ARG", element_name, param, filename)) # The following code is too much time consuming. Only do where really requested if checking > 0: #Try to evaluate, with any object and pattern recIDs = perform_request_search() if len(recIDs) > 0: recID = recIDs[0] bfo = bibformat_engine.BibFormatObject(recID, search_pattern="Test") (result, errors_) = bibformat_engine.eval_format_element(format_element, bfo, all_params, verbose=7) errors.extend(errors_) else:#Template cannot be read errors.append(("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, "")) return errors def check_format_element(name): """ Returns the list of errors in the format element given by its name The errors are the formatted errors defined in bibformat_config.py file. @param name the name of the format element to check @return a list of errors """ errors = [] filename = bibformat_engine.resolve_format_element_filename(name) if filename != None:#Can element be found in files? if can_read_format_element(name):#Can element be read? #Try to load try: module_name = filename if module_name.endswith(".py"): module_name = module_name[:-3] module = __import__("invenio.bibformat_elements."+module_name) function_format = module.bibformat_elements.__dict__[module_name].format #Try to evaluate, with any object and pattern recIDs = perform_request_search() if len(recIDs) > 0: recID = recIDs[0] bfo = bibformat_engine.BibFormatObject(recID, search_pattern="Test") element = bibformat_engine.get_format_element(name) (result, errors_) = bibformat_engine.eval_format_element(element, bfo, verbose=7) errors.extend(errors_) except Exception, e: errors.append(("ERR_BIBFORMAT_IN_FORMAT_ELEMENT", name, e)) else: errors.append(("ERR_BIBFORMAT_CANNOT_READ_ELEMENT_FILE", filename, "")) elif bibformat_dblayer.tag_exists_for_name(name):#Can element be found in database? pass else: errors.append(("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", name)) return errors def check_tag(tag): """ Checks the validity of a tag """ errors = [] return errors def perform_request_dreamweaver_floater(): """ Returns a floater for Dreamweaver with all Format Elements avalaible. """ #get format elements lists of attributes elements = bibformat_engine.get_format_elements(with_built_in_params=True) keys = elements.keys() keys.sort() elements = map(elements.get, keys) def filter_elem(element): """Keep element if is string representation contains all keywords of search_doc_pattern, and if its name does not start with a number (to remove 'garbage' from elements in tags table)""" if element['type'] != 'python' and \ element['attrs']['name'][0] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']: return False else: return True elements = filter(filter_elem, elements) return bibformat_templates.tmpl_dreamweaver_floater(cdslang, elements)