diff --git a/modules/bibformat/lib/Makefile.am b/modules/bibformat/lib/Makefile.am index 839e2ad42..4e49bbf73 100644 --- a/modules/bibformat/lib/Makefile.am +++ b/modules/bibformat/lib/Makefile.am @@ -1,33 +1,33 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. SUBDIRS = common core elements pylibdir = $(libdir)/python/invenio pylib_DATA = bibformat_config.py bibformat_templates.py \ bibformatadminlib.py bibformat_engine.py bibformat_dblayer.py \ bibformat_utils.py bibformat_migration_kit_dblayer.py bibformat_migration_kit.py \ bibformat_migration_kit_templates.py bibformat.py \ bibformat_migration_kit_assistant_lib.py \ - bibformatadmin_regression_tests.py + bibformatadmin_regression_tests.py bibformat_engine_tests.py EXTRA_DIST = $(pylib_DATA) CLEANFILES = *~ *.tmp *.pyc diff --git a/modules/bibformat/lib/bibformat.py b/modules/bibformat/lib/bibformat.py index 8d613e78f..8aa360d4a 100644 --- a/modules/bibformat/lib/bibformat.py +++ b/modules/bibformat/lib/bibformat.py @@ -1,203 +1,260 @@ # -*- coding: utf-8 -*- ## $Id$ ## Bibformat. Format records using specified format. ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +""" +Format records using specified format. + +API functions: format_record, format_records, create_excel, get_output_format_content_type + +Used to wrap the BibFormat engine and associated functions. This is also where +special formatting of multiple records (that the engine does not handle, as it works +on a single record basis) should be put, with name create_*. + +SEE: bibformat_utils.py + +FIXME: currently copies record_exists() code from search engine. Refactor later. +""" + import zlib -from invenio import bibformat_dblayer +from invenio import bibformat_dblayer +from invenio import bibformat_engine +from invenio import bibformat_utils from invenio.config import cdslang +from invenio.bibformat_config import use_old_bibformat # Functions to format a single record ## -def format_record(recID, of, ln=cdslang, verbose=0, search_pattern=None, xml_record=None, uid=None): +def format_record(recID, of, ln=cdslang, verbose=0, search_pattern=[], xml_record=None, uid=None): """ Formats a record given output format. Returns a formatted version of the record in the specified language, search pattern, and with the specified output format. The function will define which format template must be applied. The record to be formatted can be specified with its ID (with 'recID' parameter) or given as XML representation(with 'xml_record' parameter). If both are specified 'recID' is ignored. 'uid' allows to grant access to some functionalities on a page depending on the user's priviledges. Typically use webuser.getUid(req). This uid has sense only in the case of on-the-fly formatting. @param recID the ID of record to format @param of an output format code (or short identifier for the output format) @param ln the language to use to format the record @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, stop if error in format elements 9: errors and warnings, stop if error (debug mode )) - @param search_pattern the context in which this record was asked to be formatted (User request in web interface) + @param search_pattern list of strings representing the user request in web interface @param xml_record an xml string represention of the record to format @param uid the user id of the person who will view the formatted page (if applicable) @return formatted record """ + ############### FIXME: REMOVE WHEN MIGRATION IS DONE ############### + if use_old_bibformat: + return bibformat_engine.call_old_bibformat(recID, format=of) + ############################# END ################################## + return bibformat_engine.format_record(recID=recID, of=of, ln=ln, verbose=verbose, search_pattern=search_pattern, xml_record=xml_record, uid=uid) -def get_xml(recID, format='xm', decompress=zlib.decompress): +def record_get_xml(recID, format='xm', decompress=zlib.decompress): """ Returns an XML string of the record given by recID. The function builds the XML directly from the database, without using the standard formatting process. 'format' allows to define the flavour of XML: - 'xm' for standard XML - 'marcxml' for MARC XML - 'oai_dc' for OAI Dublin Core - 'xd' for XML Dublin Core If record does not exist, returns empty string. @param recID the id of the record to retrieve @return the xml string of the record """ - from invenio import bibformat_utils - return bibformat_utils.get_xml(recID=recID, format=format) + return bibformat_utils.record_get_xml(recID=recID, format=format) # Helper functions to do complex formatting of multiple records +# +# You should not modify format_records when adding a complex +# formatting of multiple records, but add a create_* method +# that relies on format_records to do the formatting. ## def format_records(recIDs, of, ln=cdslang, verbose=0, search_pattern=None, xml_records=None, uid=None, prefix=None, separator=None, suffix=None, req=None): """ Returns a list of formatted records given by a list of record IDs or a list of records as xml. Adds a prefix before each record, a suffix after each record, plus a separator between records. You can either specify a list of record IDs to format, or a list of xml records, but not both (if both are specified recIDs is ignored). 'separator' is a function that returns a string as separator between records. The function must take an integer as unique parameter, which is the index in recIDs (or xml_records) of the record that has just been formatted. For example separator(i) must return the separator between recID[i] and recID[i+1]. Alternatively separator can be a single string, which will be used to separate all formatted records. 'req' is an optional parameter on which the result of the function are printed lively (prints records after records) if it is given. This function takes the same parameters as 'format_record' except for: @param recIDs a list of record IDs @param xml_records a list of xml string representions of the records to format @param header a string printed before all formatted records @param separator either a string or a function that returns string to separate formatted records @param req an optional request object where to print records """ formatted_records = '' - + #Fill one of the lists with Nones if xml_records != None: recIDs = map(lambda x:None, xml_records) else: xml_records = map(lambda x:None, recIDs) total_rec = len(recIDs) last_iteration = False for i in range(total_rec): if i == total_rec - 1: last_iteration = True #Print prefix if prefix != None: if isinstance(prefix, str): formatted_records += prefix if req != None: req.write(prefix) else: string_prefix = prefix(i) formatted_records += string_prefix if req != None: req.write(string_prefix) #Print formatted record formatted_record = format_record(recIDs[i], of, ln, verbose, search_pattern, xml_records[i], uid) formatted_records += formatted_record if req != None: req.write(formatted_record) #Print suffix if suffix != None: if isinstance(suffix, str): formatted_records += suffix if req != None: req.write(suffix) else: string_suffix = suffix(i) formatted_records += string_suffix if req != None: req.write(string_suffix) #Print separator if needed if separator != None and not last_iteration: if isinstance(separator, str): formatted_records += separator if req != None: req.write(separator) else: string_separator = separator(i) formatted_records += string_separator if req != None: req.write(string_separator) return formatted_records -def create_Excel(recIDs): +def create_excel(recIDs, req=None, ln=cdslang): """ - Returns an Excel readable format containing the given recIDs + Returns an Excel readable format containing the given recIDs. + If 'req' is given, also prints the output in 'req' while individual + records are being formatted. + + This method shows how to create a custom formatting of multiple + records. + The excel format is a basic HTML table that most spreadsheets + applications can parse. @param recIDs a list of record IDs @return a string in Excel format """ - return "" + # Prepare the column headers to display in the Excel file + column_headers_list = ['Title', + 'Authors', + 'Addresses', + 'Affiliation', + 'Date', + 'Publisher', + 'Place', + 'Abstract', + 'Keywords', + 'Notes'] + + # Prepare Content + column_headers = ''.join(column_headers_list) + '' + column_headers = '\n'+ '' + footer = '
' + column_headers + '
' + + #Apply content_type and print column headers + if req != None: + req.content_type = get_output_format_content_type('excel') + req.headers_out["Content-Disposition"] = "inline; filename=%s" % 'results.xls' + req.send_http_header() + req.write(column_headers) + + #Format the records + excel_formatted_records = format_records(recIDs, 'excel', ln=cdslang, + separator='\n', req=req) + if req != None: + req.write(footer) + + return column_headers + excel_formatted_records + footer # Utility functions ## def get_output_format_content_type(of): """ Returns the content type (eg. 'text/html' or 'application/ms-excel') \ of the given output format. @param of the code of output format for which we want to get the content type """ content_type = bibformat_dblayer.get_output_format_content_type(of) if content_type == '': content_type = 'text/html' return content_type - -#Import at the end to avoid recursive import -#problems due to call to search_engine in -#bibformat_engine for backward compatibility -from invenio import bibformat_engine diff --git a/modules/bibformat/lib/bibformat_config.py b/modules/bibformat/lib/bibformat_config.py index 4d31dd4a6..8ef099176 100644 --- a/modules/bibformat/lib/bibformat_config.py +++ b/modules/bibformat/lib/bibformat_config.py @@ -1,97 +1,91 @@ # -*- coding: utf-8 -*- # ## $Id$ ## ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __lastupdated__ = """$Date$""" import os from invenio.config import etcdir, pylibdir -#Debug mode 0 is silent: no error message or warnings. Don't stop formatting -#Debug mode 1 is verbose: print error messages and warnings. -#debug = 1 - -#True if old php format written in EL must be used by bibreformat. -#False if new python format must be used by bibreformat. -use_php_el_formats = False - -#The name of the function to call inside elements -#elements_function_name = "format" +#True if old php format written in EL must be used by Invenio. +#False if new python format must be used. If set to 'False' but +#new format cannot be found, old format will be used. +use_old_bibformat = False #Paths to main formats directories templates_path = "%s%sbibformat%sformat_templates" % (etcdir, os.sep, os.sep) elements_import_path = "invenio.bibformat_elements" elements_path = "%s%sinvenio%sbibformat_elements" % (pylibdir, os.sep, os.sep) outputs_path = "%s%sbibformat%soutput_formats" % (etcdir, os.sep, os.sep) #File extensions of formats format_template_extension = "bft" format_output_extension = "bfo" cfg_bibformat_error_messages = \ { 'ERR_BIBFORMAT_INVALID_TAG' : '%s is an invalid tag.', 'ERR_BIBFORMAT_NO_TEMPLATE_FOUND' : 'No template could be found for output format %s.', 'ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME' : 'Could not find format element corresponding to %s.', 'ERR_BIBFORMAT_CANNOT_RESOLVE_OUTPUT_NAME ' : 'Could not find output format corresponding to %s.', 'ERR_BIBFORMAT_CANNOT_RESOLVE_TEMPLATE_FILE' : 'Could not find format template corresponding to %s.', 'ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND' : 'Format element %s could not be found.', 'ERR_BIBFORMAT_BAD_BFO_RECORD' : 'Could not initialize new BibFormatObject with record id %s.', 'ERR_BIBFORMAT_NB_OUTPUTS_LIMIT_REACHED' : 'Could not find a fresh name for output format %s.', 'ERR_BIBFORMAT_KB_ID_UNKNOWN' : 'Knowledge base with id %s is unknown.', 'ERR_BIBFORMAT_OUTPUT_FORMAT_CODE_UNKNOWN' : 'Output format with code %s could not be found.', 'ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE' : 'Format template %s cannot not be read. %s', 'ERR_BIBFORMAT_CANNOT_WRITE_TEMPLATE_FILE' : 'BibFormat could not write to format template %s. %s', 'ERR_BIBFORMAT_CANNOT_READ_OUTPUT_FILE' : 'Output format %s cannot not be read. %s', 'ERR_BIBFORMAT_CANNOT_WRITE_OUTPUT_FILE' : 'BibFormat could not write to output format %s. %s', 'ERR_BIBFORMAT_EVALUATING_ELEMENT' : 'Error when evaluating format element %s with parameters. %s', 'ERR_BIBFORMAT_CANNOT_READ_ELEMENT_FILE' : 'Output format %s cannot not be read. %s', 'ERR_BIBFORMAT_INVALID_OUTPUT_RULE_FIELD' : 'Should be "tag field_number:" at line %s.', 'ERR_BIBFORMAT_INVALID_OUTPUT_RULE_FIELD_TAG' : 'Invalid tag "%s" at line %s.', 'ERR_BIBFORMAT_OUTPUT_CONDITION_OUTSIDE_FIELD': 'Condition "%s" is outside a tag specification at line %s.', 'ERR_BIBFORMAT_INVALID_OUTPUT_CONDITION' : 'Condition "%s" can only have a single separator --- at line %s.', 'ERR_BIBFORMAT_WRONG_OUTPUT_RULE_TEMPLATE_REF': 'Template "%s" does not exist at line %s.', 'ERR_BIBFORMAT_WRONG_OUTPUT_LINE' : 'Line %s could not be understood at line %s.', 'ERR_BIBFORMAT_OUTPUT_WRONG_TAG_CASE' : '"tag" must be lowercase in "%s" at line %s.', 'ERR_BIBFORMAT_OUTPUT_RULE_FIELD_COL' : 'Tag specification "%s" must end with column ":" at line %s.', 'ERR_BIBFORMAT_OUTPUT_TAG_MISSING' : 'Tag specification "%s" must start with "tag" at line %s.', 'ERR_BIBFORMAT_OUTPUT_WRONG_DEFAULT_CASE' : '"default" keyword must be lowercase in "%s" at line %s', 'ERR_BIBFORMAT_OUTPUT_RULE_DEFAULT_COL' : 'Missing column ":" after "default" in "%s" at line %s.', 'ERR_BIBFORMAT_OUTPUT_DEFAULT_MISSING' : 'Default template specification "%s" must start with "default :" at line %s.', 'ERR_BIBFORMAT_VALIDATE_NO_FORMAT' : 'No format specified for validation. Please specify one.', 'ERR_BIBFORMAT_TEMPLATE_HAS_NO_NAME' : 'Could not find a name specified in tag "" inside format template %s.', 'ERR_BIBFORMAT_TEMPLATE_HAS_NO_DESCRIPTION' : 'Could not find a description specified in tag "" inside format template %s.', 'ERR_BIBFORMAT_TEMPLATE_CALLS_UNREADABLE_ELEM': 'Format template %s calls unreadable element "%s". Check element file permissions.', 'ERR_BIBFORMAT_TEMPLATE_CALLS_UNLOADABLE_ELEM': 'Cannot load element "%s" in template %s. Check element code.', 'ERR_BIBFORMAT_TEMPLATE_CALLS_UNDEFINED_ELEM' : 'Format template %s calls undefined element "%s".', 'ERR_BIBFORMAT_TEMPLATE_WRONG_ELEM_ARG' : 'Format element %s uses unknown parameter "%s" in format template %s.', 'ERR_BIBFORMAT_IN_FORMAT_ELEMENT' : 'Error in format element %s. %s', 'ERR_BIBFORMAT_NO_RECORD_FOUND_FOR_PATTERN' : 'No Record Found for %s.', 'ERR_BIBFORMAT_NBMAX_NOT_INT' : '"nbMax" parameter for %s must be an "int".' } cfg_bibformat_warning_messages = \ { 'WRN_BIBFORMAT_OUTPUT_FORMAT_NAME_TOO_LONG' : 'Name %s is too long for output format %s in language %s. Truncated to first 256 characters.', 'WRN_BIBFORMAT_KB_NAME_UNKNOWN' : 'Cannot find knowledge base named %s.', 'WRN_BIBFORMAT_KB_MAPPING_UNKNOWN' : 'Cannot find a mapping with key %s in knowledge base %s.', 'WRN_BIBFORMAT_CANNOT_WRITE_IN_ETC_BIBFORMAT' : 'Cannot write in etc/bibformat dir of your Invenio installation. Check directory permission.', 'WRN_BIBFORMAT_CANNOT_WRITE_MIGRATION_STATUS' : 'Cannot write file migration_status.txt in etc/bibformat dir of your Invenio installation. Check file permission.', 'WRN_BIBFORMAT_CANNOT_EXECUTE_REQUEST' : 'You request could not be executed.' } diff --git a/modules/bibformat/lib/bibformat_engine.py b/modules/bibformat/lib/bibformat_engine.py index 9f80e3cb1..aca307abc 100644 --- a/modules/bibformat/lib/bibformat_engine.py +++ b/modules/bibformat/lib/bibformat_engine.py @@ -1,1587 +1,1602 @@ # -*- coding: utf-8 -*- ## $Id$ ## Bibformt engine. Format XML Marc record using specified format. ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +""" +Formats a single XML Marc record using specified format. +There is no API for the engine. Instead use bibformat.py. + +SEE: bibformat.py, bibformat_utils.py +""" import re import sys import os import inspect import traceback +import zlib from invenio.errorlib import register_errors, get_msgs_for_code_list from invenio.config import * from invenio.bibrecord import create_record, record_get_field_instances, record_get_field_value, record_get_field_values from invenio.dbquery import run_sql from invenio.messages import language_list_long, wash_language from invenio import bibformat_dblayer from invenio.bibformat_config import format_template_extension, format_output_extension, templates_path, elements_path, outputs_path, elements_import_path +from bibformat_utils import record_get_xml __lastupdated__ = """$Date$""" #Cache for data we have allready read and parsed format_templates_cache = {} format_elements_cache = {} format_outputs_cache = {} kb_mappings_cache = {} cdslangs = language_list_long() #Regular expression for finding ... tag in format templates pattern_lang = re.compile(r''' #closing start tag (?P.*?) #anything but the next group (greedy) () #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) #Builds regular expression for finding each known language in tags ln_pattern_text = r"<(" for lang in cdslangs: ln_pattern_text += lang[0] +r"|" ln_pattern_text = ln_pattern_text.rstrip(r"|") ln_pattern_text += r")>(.*?)" ln_pattern = re.compile(ln_pattern_text) #Regular expression for finding tag in format templates pattern_format_template_name = re.compile(r''' #closing start tag (?P.*?) #name value. any char that is not end tag ()(\n)? #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) #Regular expression for finding tag in format templates pattern_format_template_desc = re.compile(r''' #closing start tag (?P.*?) #description value. any char that is not end tag (\n)? #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) #Regular expression for finding tags in format templates pattern_tag = re.compile(r''' [^/\s]+) #any char but a space or slash \s* #any number of spaces (?P(\s* #params here (?P([^=\s])*)\s* #param name: any chars that is not a white space or equality. Followed by space(s) =\s* #equality: = followed by any number of spaces (?P[\'"]) #one of the separators (?P.*?) #param value: any chars that is not a separator like previous one (?P=sep) #same separator as starting one )*) #many params \s* #any number of spaces (/)?> #end of the tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) #Regular expression for finding params inside tags in format templates pattern_function_params = re.compile(''' (?P([^=\s])*)\s* # Param name: any chars that is not a white space or equality. Followed by space(s) =\s* # Equality: = followed by any number of spaces (?P[\'"]) # One of the separators (?P.*?) # Param value: any chars that is not a separator like previous one (?P=sep) # Same separator as starting one ''', re.VERBOSE | re.DOTALL ) #Regular expression for finding format elements "params" attributes (defined by @param) pattern_format_element_params = re.compile(''' @param\s* # Begins with @param keyword followed by space(s) (?P[^\s=]*)\s* # A single keyword, and then space(s) #(=\s*(?P[\'"]) # Equality, space(s) and then one of the separators #(?P.*?) # Default value: any chars that is not a separator like previous one #(?P=sep) # Same separator as starting one #)?\s* # Default value for param is optional. Followed by space(s) (?P.*) # Any text that is not end of line (thanks to MULTILINE parameter) ''', re.VERBOSE | re.MULTILINE) #Regular expression for finding format elements "see also" attribute (defined by @see) pattern_format_element_seealso = re.compile('''@see\s*(?P.*)''', re.VERBOSE | re.MULTILINE) #Regular expression for finding 2 expressions in quotes, separated by comma (as in template("1st","2nd") ) #Used when parsing output formats ## pattern_parse_tuple_in_quotes = re.compile(''' ## (?P[\'"]) ## (?P.*) ## (?P=sep1) ## \s*,\s* ## (?P[\'"]) ## (?P.*) ## (?P=sep2) ## ''', re.VERBOSE | re.MULTILINE) -def format_record(recID, of, ln=cdslang, verbose=0, search_pattern=None, xml_record=None, uid=None): +def call_old_bibformat(recID, format="HD"): + """ + FIXME: REMOVE FUNCTION WHEN MIGRATION IS DONE + Calls BibFormat for the record RECID in the desired output format FORMAT. + + Note: this functions always try to return HTML, so when + bibformat returns XML with embedded HTML format inside the tag + FMT $g, as is suitable for prestoring output formats, we + perform un-XML-izing here in order to return HTML body only. + """ + # look for formatted notice existence: + query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, of) + res = run_sql(query, None, 1) + if res: + # record 'recID' is formatted in 'format', so print it + decompress = zlib.decompress + return "%s" % decompress(res[0][0]) + else: + # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format: + out = "" + pipe_input, pipe_output, pipe_error = os.popen3(["%s/bibformat" % bindir, "otype=%s" % format], 'rw') + #pipe_input.write(print_record(recID, "xm")) + pipe_input.write(record_get_xml(recID, "xm")) + pipe_input.close() + bibformat_output = pipe_output.read() + pipe_output.close() + pipe_error.close() + if bibformat_output.startswith(""): + dom = minidom.parseString(bibformat_output) + for e in dom.getElementsByTagName('subfield'): + if e.getAttribute('code') == 'g': + for t in e.childNodes: + out += t.data.encode('utf-8') + else: + out = bibformat_output + return out + +def format_record(recID, of, ln=cdslang, verbose=0, search_pattern=[], xml_record=None, uid=None): """ Formats a record given output format. Main entry function of bibformat engine. Returns a formatted version of the record in the specified language, search pattern, and with the specified output format. The function will define which format template must be applied. You can either specify an record ID to format, or give its xml representation. if 'xml_record' != None, then use it instead of recID. 'uid' allows to grant access to some functionalities on a page depending on the user's priviledges. @param recID the ID of record to format @param of an output format code (or short identifier for the output format) @param ln the language to use to format the record @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, stop if error in format elements 9: errors and warnings, stop if error (debug mode )) - @param search_pattern the context in which this record was asked to be formatted (User request in web interface) + @param search_pattern list of strings representing the user request in web interface @param xml_record an xml string representing the record to format @param uid the user id of the person who will view the formatted page @return formatted record """ errors_ = [] - - #Test record existence - if xml_record == None and record_exists(recID) == 0: - #Record does not exist - error = get_msgs_for_code_list([("ERR_BIBFORMAT_NO_RECORD_FOUND_FOR_PATTERN", "recid:%s" % recID)], - file='error', ln=cdslang) - errors_.append(error) - if verbose == 0: - register_errors(error, 'error') - return ("", errors_) + # Temporary workflow (during migration of formats): + # Call new BibFormat + # But if format not found for new BibFormat, then call old BibFormat #Create a BibFormat Object to give that contain record and context bfo = BibFormatObject(recID, ln, search_pattern, xml_record, uid) - #Find out which format template to use based on record and output format. template = decide_format_template(bfo, of) - if template == None: - + if template == None: ############### FIXME: REMOVE WHEN MIGRATION IS DONE ############### - - # look for detailed format existence: - query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, of) - res = run_sql(query, None, 1) - if res: - # record 'recID' is formatted in 'format', so print it - import zlib - decompress = zlib.decompress - return "%s" % decompress(res[0][0]) - else: - from invenio.search_engine import call_bibformat - # record 'recID' is not formatted in 'format', so try to call BibFormat on the fly or use default format: - out_record_in_format = call_bibformat(recID, of) - if out_record_in_format: - return out_record_in_format - + # template not found in new BibFormat. Call old one + return call_old_bibformat(recID, format=of) ############################# END ################################## - - - + error = get_msgs_for_code_list([("ERR_BIBFORMAT_NO_TEMPLATE_FOUND", of)], file='error', ln=cdslang) errors_.append(error) if verbose == 0: register_errors(error, 'error') elif verbose > 5: return error[0][1] return "" #Format with template (out, errors) = format_with_format_template(template, bfo, verbose) errors_.extend(errors) return out def decide_format_template(bfo, of): """ Returns the format template name that should be used for formatting given output format and BibFormatObject. Look at of rules, and take the first matching one. If no rule matches, returns None To match we ignore lettercase and spaces before and after value of rule and value of record @param bfo a BibFormatObject @param of the code of the output format to use """ output_format = get_output_format(of) for rule in output_format['rules']: value = bfo.field(rule['field']).strip()#Remove spaces pattern = rule['value'].strip() #Remove spaces if re.match(pattern, value, re.IGNORECASE) != None: return rule['template'] template = output_format['default'] if template != '': return template else: return None def format_with_format_template(format_template_filename, bfo, verbose=0, format_template_code=None): """ Format a record given a format template. Also returns errors Returns a formatted version of the record represented by bfo, in the language specified in bfo, and with the specified format template. Parameter format_template_filename will be ignored if format_template_code is provided. This allows to preview format code without having to save file on disk @param format_template_filename the dilename of a format template @param bfo the object containing parameters for the current formatting @param format_template_code if not empty, use code as template instead of reading format_template_filename (used for previews) @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (formatted text, errors) """ errors_ = [] if format_template_code != None: format_content = str(format_template_code) else: format_content = get_format_template(format_template_filename)['code'] localized_format = filter_languages(format_content, bfo.lang) (evaluated_format, errors) = eval_format_template_elements(localized_format, bfo, verbose) errors_ = errors return (evaluated_format, errors) def eval_format_template_elements(format_template, bfo, verbose=0): """ Evalutes the format elements of the given template and replace each element with its value. Also returns errors. Prepare the format template content so that we can directly replace the marc code by their value. This implies: 1) Look for special tags 2) replace special tags by their evaluation @param format_template the format template code @param bfo the object containing parameters for the current formatting @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (result, errors) """ errors_ = [] #First define insert_element_code(match), used in re.sub() function def insert_element_code(match): """ Analyses 'match', interpret the corresponding code, and return the result of the evaluation. Called by substitution in 'eval_format_template_elements(...)' @param match a match object corresponding to the special tag that must be interpreted """ function_name = match.group("function_name") format_element = get_format_element(function_name, verbose) params = {} #look for function parameters given in format template code all_params = match.group('params') if all_params != None: function_params_iterator = pattern_function_params.finditer(all_params) for param_match in function_params_iterator: name = param_match.group('param') value = param_match.group('value') params[name] = value #Evaluate element with params and return (Do not return errors) (result, errors) = eval_format_element(format_element, bfo, params, verbose) errors_ = errors return result #Substitute special tags in the format by our own text. #Special tags have the form format = pattern_tag.sub(insert_element_code, format_template) return (format, errors_) def eval_format_element(format_element, bfo, parameters={}, verbose=0): """ Returns the result of the evaluation of the given format element name, with given BibFormatObject and parameters. Also returns the errors of the evaluation. @param format_element a format element structure as returned by get_format_element @param bfo a BibFormatObject used for formatting @param parameters a dict of parameters to be used for formatting. Key is parameter and value is value of parameter @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (result, errors) """ errors = [] #Load special values given as parameters prefix = parameters.get('prefix', "") suffix = parameters.get('suffix', "") default_value = parameters.get('default', "") #3 possible cases: #a) format element file is found: we execute it #b) format element file is not found, but exist in tag table (e.g. bfe_isbn) #c) format element is totally unknown. Do nothing or report error if format_element != None and format_element['type'] == "python": #a) #We found an element with the tag name, of type "python" #Prepare a dict 'params' to pass as parameter to 'format' function of element params = {} #look for parameters defined in format element #fill them with specified default values and values #given as parameters for param in format_element['attrs']['params']: name = param['name'] default = param['default'] params[name] = parameters.get(name, default) #Add BibFormatObject params['bfo'] = bfo #execute function with given parameters and return result. output_text = "" function = format_element['code'] output_text = apply(function, (), params) try: output_text = apply(function, (), params) except Exception, e: output_text = "" name = format_element['attrs']['name'] error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT", name, str(params)) errors.append(error) if verbose == 0: register_errors(errors, 'error') elif verbose >=5: tb = sys.exc_info()[2] error_string = get_msgs_for_code_list(error, file='error', ln=cdslang) stack = traceback.format_exception(Exception, e, tb, limit=None) output_text = ''+error_string[0][1] + "".join(stack) +' ' if output_text == None: output_text = "" else: output_text = str(output_text) #Add prefix and suffix if they have been given as parameters and if #the evaluation of element is not empty if output_text.strip() != "": output_text = prefix + output_text + suffix #Add the default value if output_text is empty if output_text == "": output_text = default_value return (output_text, errors) elif format_element != None and format_element['type'] =="field": #b) #We have not found an element in files that has the tag name. Then look for it #in the table "tag" # # # #Load special values given as parameters separator = parameters.get('separator ', "") nbMax = parameters.get('nbMax', "") #Get the fields tags that have to be printed tags = format_element['attrs']['tags'] output_text = [] #Get values corresponding to tags for tag in tags: values = bfo.fields(tag)#Retrieve record values for tag if len(values)>0 and isinstance(values[0], dict):#flatten dict to its values only values_list = map(lambda x: x.values(), values) #output_text.extend(values) for values in values_list: output_text.extend(values) else: output_text.extend(values) if nbMax != "": try: nbMax = int(nbMax) output_text = output_text[:nbMax] except: name = format_element['attrs']['name'] error = ("ERR_BIBFORMAT_NBMAX_NOT_INT", name) errors.append(error) if verbose < 5: register_errors(error, 'error') elif verbose >=5: error_string = get_msgs_for_code_list(error, file='error', ln=cdslang) output_text = output_text.append(error_string[0][1]) #Add prefix and suffix if they have been given as parameters and if #the evaluation of element is not empty. #If evaluation is empty string, return default value if it exists. Else return empty string if ("".join(output_text)).strip() != "": return (prefix + separator.join(output_text) + suffix, errors) else: #Return default value return (default_value, errors) else: #c) Element is unknown error = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", format_element)], file='error', ln=cdslang) errors.append(error) if verbose < 5: register_errors(error, 'error') return ("", errors) elif verbose >=5: if verbose >= 9: sys.exit(error[0][1]) return (''+error[0][1]+'', errors) def filter_languages(format_template, ln='en'): """ Filters the language tags that do not correspond to the specified language. @param format_template the format template code @param ln the language that is NOT filtered out from the template @return the format template with unnecessary languages filtered out """ #First define search_lang_tag(match) and clean_language_tag(match), used #in re.sub() function def search_lang_tag(match): """ Searches for the ... tag and remove inner localized tags such as , , that are not current_lang. If current_lang cannot be found inside ... , try to use 'cdslang' @param match a match object corresponding to the special tag that must be interpreted """ current_lang = ln def clean_language_tag(match): """ Return tag text content if tag language of match is output language. Called by substitution in 'filter_languages(...)' @param match a match object corresponding to the special tag that must be interpreted """ if match.group(1) == current_lang: return match.group(2) else: return "" #End of clean_language_tag lang_tag_content = match.group("langs") #Try to find tag with current lang. If it does not exists, then current_lang #becomes cdslang until the end of this replace pattern_current_lang = re.compile(r"<"+current_lang+"\s*>(.*?)") if re.search(pattern_current_lang, lang_tag_content) == None: current_lang = cdslang cleaned_lang_tag = ln_pattern.sub(clean_language_tag, lang_tag_content) return cleaned_lang_tag #End of search_lang_tag filtered_format_template = pattern_lang.sub(search_lang_tag, format_template) return filtered_format_template def parse_tag(tag): """ Parse a marc code and decompose it in a table with: 0-tag 1-indicator1 2-indicator2 3-subfield The first 3 chars always correspond to tag. The indicators are optional. However they must both be indicated, or both ommitted. If indicators are ommitted or indicated with underscore '_', they mean "No indicator". The subfield is optional. It can optionally be preceded by a dot '.' or '$$' or '$' Any of the chars can be replaced by wildcard % THE FUNCTION DOES NOT CHECK WELLFORMNESS OF 'tag' Any empty chars is not considered For example: >> parse_tag('245COc') = ['245', 'C', 'O', 'c'] >> parse_tag('245C_c') = ['245', 'C', '', 'c'] >> parse_tag('245__c') = ['245', '', '', 'c'] >> parse_tag('245__$$c') = ['245', '', '', 'c'] >> parse_tag('245__$c') = ['245', '', '', 'c'] >> parse_tag('245 $c') = ['245', '', '', 'c'] >> parse_tag('245 $$c') = ['245', '', '', 'c'] >> parse_tag('245__.c') = ['245', '', '', 'c'] >> parse_tag('245 .c') = ['245', '', '', 'c'] >> parse_tag('245C_$c') = ['245', 'C', '', 'c'] >> parse_tag('245CO$$c') = ['245', 'C', 'O', 'c'] >> parse_tag('245C_.c') = ['245', 'C', '', 'c'] >> parse_tag('245$c') = ['245', '', '', 'c'] >> parse_tag('245.c') = ['245', '', '', 'c'] >> parse_tag('245$$c') = ['245', '', '', 'c'] >> parse_tag('245__%') = ['245', '', '', ''] >> parse_tag('245__$$%') = ['245', '', '', ''] >> parse_tag('245__$%') = ['245', '', '', ''] >> parse_tag('245 $%') = ['245', '', '', ''] >> parse_tag('245 $$%') = ['245', '', '', ''] >> parse_tag('245$%') = ['245', '', '', ''] >> parse_tag('245.%') = ['245', '', '', ''] >> parse_tag('245$$%') = ['245', '', '', ''] >> parse_tag('2%5$$a') = ['2%5', '', '', 'a'] """ p_tag = ['', '', '', ''] tag = tag.replace(" ", "") #Remove empty characters tag = tag.replace("$", "") #Remove $ characters tag = tag.replace(".", "") #Remove . characters #tag = tag.replace("_", "") #Remove _ characters p_tag[0] = tag[0:3] #tag if len(tag) == 4: p_tag[3] = tag[3] #subfield elif len(tag) == 5: ind1 = tag[3]#indicator 1 if ind1 != "_": p_tag[1] = ind1 ind2 = tag[4]#indicator 2 if ind2 != "_": p_tag[2] = ind2 elif len(tag) == 6: p_tag[3] = tag[5]#subfield ind1 = tag[3]#indicator 1 if ind1 != "_": p_tag[1] = ind1 ind2 = tag[4]#indicator 2 if ind2 != "_": p_tag[2] = ind2 return p_tag def get_format_template(filename, with_attributes=False): """ Returns the structured content of the given formate template. if 'with_attributes' is True, returns the name and description. Else 'attrs' is not returned as key in dictionary (it might, if it has already been loaded previously) {'code':"Some template code" 'attrs': {'name': "a name", 'description': "a description"} } @param filename the filename of an format template @param with_attributes if True, fetch the attributes (names and description) for format' @return strucured content of format template """ #Get from cache whenever possible global format_templates_cache if not filename.endswith("."+format_template_extension): return None if format_templates_cache.has_key(filename): #If we must return with attributes and template exist in cache with attributes #then return cache. Else reload with attributes if with_attributes == True and format_templates_cache[filename].has_key('attrs'): return format_templates_cache[filename] format_template = {'code':""} try: path = "%s%s%s" % (templates_path, os.sep, filename) format_file = open(path) format_content = format_file.read() format_file.close() #Load format template code #Remove name and description code_and_description = pattern_format_template_name.sub("", format_content) code = pattern_format_template_desc.sub("", code_and_description) # Escape % chars in code (because we will use python formatting capabilities) format_template['code'] = code except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))], file='error', ln=cdslang) register_errors(errors, 'error') #Save attributes if necessary if with_attributes: format_template['attrs'] = get_format_template_attrs(filename) #cache and return format_templates_cache[filename] = format_template return format_template def get_format_templates(with_attributes=False): """ Returns the list of all format templates if 'with_attributes' is True, returns the name and description. Else 'attrs' is not returned as key in each dictionary (it might, if it has already been loaded previously) [{'code':"Some template code" 'attrs': {'name': "a name", 'description': "a description"} }, ... } @param with_attributes if True, fetch the attributes (names and description) for formats """ format_templates = {} files = os.listdir(templates_path) for filename in files: if filename.endswith("."+format_template_extension): format_templates[filename] = get_format_template(filename, with_attributes) return format_templates def get_format_template_attrs(filename): """ Returns the attributes of the format template with given filename The attributes are {'name', 'description'} Caution: the function does not check that path exists or that the format element is valid. @param the path to a format element """ attrs = {} attrs['name'] = "" attrs['description'] = "" try: template_file = open("%s%s%s"%(templates_path, os.sep, filename)) code = template_file.read() template_file.close() match = pattern_format_template_name.search(code) if match != None: attrs['name'] = match.group('name') else: attrs['name'] = filename match = pattern_format_template_desc.search(code) if match != None: attrs['description'] = match.group('desc').rstrip('.') except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))], file='error', ln=cdslang) register_errors(errors, 'error') attrs['name'] = filename return attrs def get_format_element(element_name, verbose=0, with_built_in_params=False): """ Returns the format element structured content. Return None if element cannot be loaded (file not found, not readable or invalid) The returned structure is {'attrs': {some attributes in dict. See get_format_element_attrs_from_*} 'code': the_function_code, 'type':"field" or "python" depending if element is defined in file or table} @param element_name the name of the format element to load @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @param with_built_in_params if True, load the parameters built in all elements @return a dictionary with format element attributes """ #Get from cache whenever possible global format_elements_cache #Resolve filename and prepare 'name' as key for the cache filename = resolve_format_element_filename(element_name) if filename != None: name = filename.upper() else: name = element_name.upper() if format_elements_cache.has_key(name): element = format_elements_cache[name] if with_built_in_params == False or (with_built_in_params == True and element['attrs'].has_key('builtin_params') ): return element if filename == None: #element is maybe in tag table if bibformat_dblayer.tag_exists_for_name(element_name): format_element = {'attrs': get_format_element_attrs_from_table(element_name, with_built_in_params), 'code':None, 'type':"field"} #Cache and returns format_elements_cache[name] = format_element return format_element else: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND", element_name)], file='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >=5: sys.stderr.write(errors[0][1]) return None else: format_element = {} module_name = filename if module_name.endswith(".py"): module_name = module_name[:-3] - #module = __import__(elements_import_path+"."+module_name) try: module = __import__(elements_import_path+"."+module_name) #Load last module in import path #For eg. load bibformat_elements in invenio.elements.bibformat_element #Used to keep flexibility regarding where elements directory is (for eg. test cases) components = elements_import_path.split(".") for comp in components[1:]: module = getattr(module, comp) function_format = module.__dict__[module_name].format format_element['code'] = function_format format_element['attrs'] = get_format_element_attrs_from_function(function_format, element_name, with_built_in_params) format_element['type'] = "python" #cache and return format_elements_cache[name] = format_element return format_element except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND", element_name)], file='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(str(e)) sys.stderr.write(errors[0][1]) if verbose >= 7: raise e return None def get_format_elements(with_built_in_params=False): """ Returns the list of format elements attributes as dictionary structure Elements declared in files have priority over element declared in 'tag' table The returned object has this format: {element_name1: {'attrs': {'description':..., 'seealso':... 'params':[{'name':..., 'default':..., 'description':...}, ...] 'builtin_params':[{'name':..., 'default':..., 'description':...}, ...] }, 'code': code_of_the_element }, element_name2: {...}, ...} Returns only elements that could be loaded (not error in code) @return a dict of format elements with name as key, and a dict as attributes @param with_built_in_params if True, load the parameters built in all elements """ format_elements = {} mappings = bibformat_dblayer.get_all_name_tag_mappings() for name in mappings: format_elements[name.upper().replace(" ", "_").strip()] = get_format_element(name, with_built_in_params=with_built_in_params) files = os.listdir(elements_path) for filename in files: filename_test = filename.upper().replace(" ", "_") if filename_test.endswith(".PY") and filename != "__INIT__.PY": if filename_test.startswith("BFE_"): filename_test = filename_test[4:] element_name = filename_test[:-3] element = get_format_element(element_name, with_built_in_params=with_built_in_params) if element != None: format_elements[element_name] = element return format_elements def get_format_element_attrs_from_function(function, element_name, with_built_in_params=False): """ Returns the attributes of the function given as parameter. It looks for standard parameters of the function, default values and comments in the docstring. The attributes are {'description', 'seealso':['element.py', ...], 'params':{name:{'name', 'default', 'description'}, ...], name2:{}} The attributes are {'name' : "name of element" #basically the name of 'name' parameter 'description': "a string description of the element", 'seealso' : ["element_1.py", "element_2.py", ...] #a list of related elements 'params': [{'name':"param_name", #a list of parameters for this element (except 'bfo') 'default':"default value", 'description': "a description"}, ...], 'builtin_params': {name: {'name':"param_name",#the parameters builtin for all elem of this kind 'default':"default value", 'description': "a description"}, ...}, } @param function the formatting function of a format element @param element_name the name of the element @param with_built_in_params if True, load the parameters built in all elements """ attrs = {} attrs['description'] = "" attrs['name'] = element_name.replace(" ", "_").upper() attrs['seealso'] = [] docstring = function.__doc__ if isinstance(docstring, str): #Look for function description in docstring #match = pattern_format_element_desc.search(docstring) description = docstring.split("@param")[0] description = description.split("@see")[0] attrs['description'] = description.strip().rstrip('.') #Look for @see in docstring match = pattern_format_element_seealso.search(docstring) if match != None: elements = match.group('see').rstrip('.').split(",") for element in elements: attrs['seealso'].append(element.strip()) params = {} #Look for parameters in function definition (args, varargs, varkw, defaults) = inspect.getargspec(function) #Prepare args and defaults_list such that we can have a mapping from args to defaults args.reverse() if defaults != None: defaults_list = list(defaults) defaults_list.reverse() else: defaults_list = [] for arg, default in map(None, args, defaults_list): if arg == "bfo": continue #Don't keep this as parameter. It is hidden to users, and exists in all elements of this kind param = {} param['name'] = arg if default == None: param['default'] = "" #In case no check is made inside element, we prefer to print "" (nothing) than None in output else: param['default'] = default param['description'] = "(no description provided)" params[arg] = param if isinstance(docstring, str): #Look for @param descriptions in docstring. #Add description to existing parameters in params dict params_iterator = pattern_format_element_params.finditer(docstring) for match in params_iterator: name = match.group('name') if params.has_key(name): params[name]['description'] = match.group('desc').rstrip('.') attrs['params'] = params.values() #Load built-in parameters if necessary if with_built_in_params == True: builtin_params = [] #Add 'prefix' parameter param_prefix = {} param_prefix['name'] = "prefix" param_prefix['default'] = "" param_prefix['description'] = "A prefix printed only if the record has a value for this element" builtin_params.append(param_prefix) #Add 'suffix' parameter param_suffix = {} param_suffix['name'] = "suffix" param_suffix['default'] = "" param_suffix['description'] = "A suffix printed only if the record has a value for this element" builtin_params.append(param_suffix) #Add 'default' parameter param_default = {} param_default['name'] = "default" param_default['default'] = "" param_default['description'] = "A default value printed if the record has no value for this element" builtin_params.append(param_default) attrs['builtin_params'] = builtin_params return attrs def get_format_element_attrs_from_table(element_name, with_built_in_params=False): """ Returns the attributes of the format element with given name in 'tag' table. Returns None if element_name does not exist in tag table. The attributes are {'name' : "name of element" #basically the name of 'element_name' parameter 'description': "a string description of the element", 'seealso' : [] #a list of related elements. Always empty in this case 'params': [], #a list of parameters for this element. Always empty in this case 'builtin_params': [{'name':"param_name", #the parameters builtin for all elem of this kind 'default':"default value", 'description': "a description"}, ...], 'tags':["950.1", 203.a] #the list of tags printed by this element } @param element_name an element name in database @param element_name the name of the element @param with_built_in_params if True, load the parameters built in all elements """ attrs = {} tags = bibformat_dblayer.get_tags_from_name(element_name) field_label = "field" if len(tags)>1: field_label = "fields" attrs['description'] = "Prints %s %s of the record" % (field_label, ", ".join(tags)) attrs['name'] = element_name.replace(" ", "_").upper() attrs['seealso'] = [] attrs['params'] = [] attrs['tags'] = tags #Load built-in parameters if necessary if with_built_in_params == True: builtin_params = [] #Add 'prefix' parameter param_prefix = {} param_prefix['name'] = "prefix" param_prefix['default'] = "" param_prefix['description'] = "A prefix printed only if the record has a value for this element" builtin_params.append(param_prefix) #Add 'suffix' parameter param_suffix = {} param_suffix['name'] = "suffix" param_suffix['default'] = "" param_suffix['description'] = "A suffix printed only if the record has a value for this element" builtin_params.append(param_suffix) #Add 'separator' parameter param_separator = {} param_separator['name'] = "separator" param_separator['default'] = " " param_separator['description'] = "A separator between elements of the field" builtin_params.append(param_separator) #Add 'nbMax' parameter param_nbMax = {} param_nbMax['name'] = "nbMax" param_nbMax['default'] = "" param_nbMax['description'] = "The maximum number of values to print for this element. No limit if not specified" builtin_params.append(param_nbMax) #Add 'default' parameter param_default = {} param_default['name'] = "default" param_default['default'] = "" param_default['description'] = "A default value printed if the record has no value for this element" builtin_params.append(param_default) attrs['builtin_params'] = builtin_params return attrs def get_output_format(code, with_attributes=False, verbose=0): """ Returns the structured content of the given output format If 'with_attributes' is True, also returns the names and description of the output formats, else 'attrs' is not returned in dict (it might, if it has already been loaded previously). if output format corresponding to 'code' is not found return an empty structure. See get_output_format_attrs() to learn more on the attributes {'rules': [ {'field': "980__a", 'value': "PREPRINT", 'template': "filename_a.bft", }, {...} ], 'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1", 'content_type': "application/ms-excel" } 'default':"filename_b.bft" } @param code the code of an output_format @param with_attributes if True, fetch the attributes (names and description) for format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return strucured content of output format """ output_format = {'rules':[], 'default':""} filename = resolve_output_format_filename(code, verbose) if filename == None: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_OUTPUT_FORMAT_CODE_UNKNOWN", code)], file='error', ln=cdslang) register_errors(errors, 'error') if with_attributes == True: #Create empty attrs if asked for attributes output_format['attrs'] = get_output_format_attrs(code, verbose) return output_format #Get from cache whenever possible global format_outputs_cache if format_outputs_cache.has_key(filename): #If was must return with attributes but cache has not attributes, then load attributes if with_attributes == True and not format_outputs_cache[filename].has_key('attrs'): format_outputs_cache[filename]['attrs'] = get_output_format_attrs(code, verbose) return format_outputs_cache[filename] try: if with_attributes == True: output_format['attrs'] = get_output_format_attrs(code, verbose) path = "%s%s%s" % (outputs_path, os.sep, filename ) format_file = open(path) current_tag = '' for line in format_file: line = line.strip() if line == "": #ignore blank lines continue if line.endswith(":"): #retrieve tag clean_line = line.rstrip(": \n\r") #remove : spaces and eol at the end of line current_tag = "".join(clean_line.split()[1:]).strip() #the tag starts at second position elif line.find('---') != -1: words = line.split('---') template = words[-1].strip() condition = ''.join(words[:-1]) value = "" output_format['rules'].append({'field': current_tag, 'value': condition, 'template': template, }) elif line.find(':') != -1: #Default case default = line.split(':')[1].strip() output_format['default'] = default except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_OUTPUT_FILE", filename, str(e))], file='error', ln=cdslang) register_errors(errors, 'error') #cache and return format_outputs_cache[filename] = output_format return output_format def get_output_format_attrs(code, verbose=0): """ Returns the attributes of an output format. The attributes contain 'code', which is the short identifier of the output format (to be given as parameter in format_record function to specify the output format), 'description', a description of the output format, and 'names', the localized names of the output format. If 'content_type' is specified then the search_engine will send a file with this content type and with result of formatting as content to the user. The 'names' dict always contais 'generic', 'ln' (for long name) and 'sn' (for short names) keys. 'generic' is the default name for output format. 'ln' and 'sn' contain long and short localized names of the output format. Only the languages for which a localization exist are used. {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1", 'content_type': "application/ms-excel" } @param code the short identifier of the format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return strucured content of output format attributes """ if code.endswith("."+format_output_extension): code = code[:-(len(format_output_extension) + 1)] attrs = {'names':{'generic':"", 'ln':{}, 'sn':{}}, 'description':'', 'code':code.upper(), 'content_type':""} filename = resolve_output_format_filename(code, verbose) if filename == None: return attrs attrs['names'] = bibformat_dblayer.get_output_format_names(code) attrs['description'] = bibformat_dblayer.get_output_format_description(code) attrs['content_type'] = bibformat_dblayer.get_output_format_content_type(code) return attrs def get_output_formats(with_attributes=False): """ Returns the list of all output format, as a dictionary with their filename as key If 'with_attributes' is True, also returns the names and description of the output formats, else 'attrs' is not returned in dicts (it might, if it has already been loaded previously). See get_output_format_attrs() to learn more on the attributes {'filename_1.bfo': {'rules': [ {'field': "980__a", 'value': "PREPRINT", 'template': "filename_a.bft", }, {...} ], 'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1" } 'default':"filename_b.bft" }, 'filename_2.bfo': {...}, ... } @return the list of output formats """ output_formats = {} files = os.listdir(outputs_path) for filename in files: if filename.endswith("."+format_output_extension): code = "".join(filename.split(".")[:-1]) output_formats[filename] = get_output_format(code, with_attributes) return output_formats def get_kb_mapping(kb, string, default=""): """ Returns the value of the string' in the knowledge base 'kb'. If kb does not exist or string does not exist in kb, returns 'default' string value. @param kb a knowledge base name @param string a key in a knowledge base @param default a default value if 'string' is not in 'kb' @return the value corresponding to the given string in given kb """ global kb_mappings_cache if kb_mappings_cache.has_key(kb): kb_cache = kb_mappings_cache[kb] if kb_cache.has_key(string): value = kb_mappings_cache[kb][string] if value == None: return default else: return value else: #Precreate for caching this kb kb_mappings_cache[kb] = {} value = bibformat_dblayer.get_kb_mapping_value(kb, string) kb_mappings_cache[kb][str(string)] = value if value == None: return default else: return value def resolve_format_element_filename(string): """ Returns the filename of element corresponding to string This is necessary since format templates code call elements by ignoring case, for eg. is the same as . It is also recommended that format elements filenames are prefixed with bfe_ . We need to look for these too. The name of the element has to start with "BFE_". @param name a name for a format element @return the corresponding filename, with right case """ if not string.endswith(".py"): name = string.replace(" ", "_").upper() +".PY" else: name = string.replace(" ", "_").upper() files = os.listdir(elements_path) for filename in files: test_filename = filename.replace(" ", "_").upper() if test_filename == name or \ test_filename == "BFE_" + name or \ "BFE_" + test_filename == name: return filename #No element with that name found #Do not log error, as it might be a normal execution case: #element can be in database return None def resolve_output_format_filename(code, verbose=0): """ Returns the filename of output corresponding to code This is necessary since output formats names are not case sensitive but most file systems are. @param code the code for an output format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return the corresponding filename, with right case, or None if not found """ code = re.sub(r"[^.0-9a-zA-Z]", "", code) #Remove non alphanumeric chars (except .) if not code.endswith("."+format_output_extension): code = re.sub(r"\W", "", code) code += "."+format_output_extension files = os.listdir(outputs_path) for filename in files: if filename.upper() == code.upper(): return filename #No output format with that name found errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_OUTPUT_NAME", code)], file='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(errors[0][1]) if verbose >= 9: sys.exit(errors[0][1]) return None def get_fresh_format_template_filename(name): """ Returns a new filename and name for template with given name. Used when writing a new template to a file, so that the name has no space, is unique in template directory Returns (unique_filename, modified_name) @param a name for a format template @return the corresponding filename, and modified name if necessary """ #name = re.sub(r"\W", "", name) #Remove non alphanumeric chars name = name.replace(" ", "_") filename = name filename = re.sub(r"[^.0-9a-zA-Z]", "", filename) #Remove non alphanumeric chars (except .) path = templates_path + os.sep + filename + "." + format_template_extension index = 1 while os.path.exists(path): index += 1 filename = name + str(index) path = templates_path + os.sep + filename + "." + format_template_extension if index > 1: returned_name = (name + str(index)).replace("_", " ") else: returned_name = name.replace("_", " ") return (filename + "." + format_template_extension, returned_name) #filename.replace("_", " ")) def get_fresh_output_format_filename(code): """ Returns a new filename for output format with given code. Used when writing a new output format to a file, so that the code has no space, is unique in output format directory. The filename also need to be at most 6 chars long, as the convention is that filename == output format code (+ .extension) We return an uppercase code Returns (unique_filename, modified_code) @param code the code of an output format @return the corresponding filename, and modified code if necessary """ #code = re.sub(r"\W", "", code) #Remove non alphanumeric chars code = code.upper().replace(" ", "_") code = re.sub(r"[^.0-9a-zA-Z]", "", code) #Remove non alphanumeric chars (except .) if len(code) > 6: code = code[:6] filename = code path = outputs_path + os.sep + filename + "." + format_output_extension index = 2 while os.path.exists(path): filename = code + str(index) if len(filename) > 6: filename = code[:-(len(str(index)))]+str(index) index += 1 path = outputs_path + os.sep + filename + "." + format_output_extension #We should not try more than 99999... Well I don't see how we could get there.. Sanity check. if index >= 99999: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_NB_OUTPUTS_LIMIT_REACHED", code)], file='error', ln=cdslang) register_errors(errors, 'error') sys.exit("Output format cannot be named as %s"%code) return (filename + "." + format_output_extension, filename) def clear_caches(): """ Clear the caches (Output Format, Format Templates and Format Elements) """ global format_templates_cache, format_elements_cache , format_outputs_cache, kb_mappings_cache format_templates_cache = {} format_elements_cache = {} format_outputs_cache = {} kb_mappings_cache = {} - -from bibformat_utils import get_xml -from invenio.search_engine import record_exists, get_fieldvalues - class BibFormatObject: """ An object that encapsulates a record and associated methods, and that is given as parameter to all format elements 'format' function. The object is made specifically for a given formatting, i.e. it includes for example the language for the formatting. The object provides basic accessors to the record. For full access, one can get the record with get_record() and then use BibRecord methods on the returned object. """ #The record record = None #The language in which the formatting has to be done lang = cdslang - #A string pattern describing the context in which the record has to be formatted. - #It represents the user request in web interface search - search_pattern = None + #A list of string describing the context in which the record has to be formatted. + #It represents the words of the user request in web interface search + search_pattern = [] #The id of the record recID = 0 #The user id of the person who will view the formatted page (if applicable) #This allows for example to print a "edit record" link for people #who have right to edit a record. uid = None - def __init__(self, recID, ln=cdslang, search_pattern=None, xml_record=None, uid=None): + def __init__(self, recID, ln=cdslang, search_pattern=[], xml_record=None, uid=None): """ Creates a new bibformat object, with given record. You can either specify an record ID to format, or give its xml representation. if 'xml_record' != None, use 'xml_record' instead of recID for the record. 'uid' allows to grant access to some functionalities on a page depending on the user's priviledges. @param recID the id of a record @param ln the language in which the record has to be formatted - @param search_pattern the request used by the user in web interface + @param search_pattern list of string representing the request used by the user in web interface @param xml_record a xml string of the record to format @param uid the user id of the person who will view the formatted page """ if xml_record != None: #If record is given as parameter self.record = create_record(xml_record)[0] recID = record_get_field_value(self.record,"001") self.lang = wash_language(ln) self.search_pattern = search_pattern self.recID = recID self.uid = uid def get_record(self): """ Returns the record of this BibFormatObject instance @return the record structure as returned by BibRecord """ #Create record if necessary if self.record == None: - record = create_record(get_xml(self.recID, 'xm')) + record = create_record(record_get_xml(self.recID, 'xm')) self.record = record[0] return self.record def control_field(self, tag): """ Returns the value of control field given by tag in record @param record the record to retrieve values from @param tag the marc code of a field @return value of field tag in record """ if self.get_record() == None: #Case where BibRecord could not parse object return '' p_tag = parse_tag(tag) return record_get_field_value(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) def field(self, tag): """ Returns the value of the field corresponding to tag in the current record. if the value does not exist, return empty string @param record the record to retrieve values from @param tag the marc code of a field @return value of field tag in record """ list_of_fields = self.fields(tag) if len(list_of_fields) > 0: return list_of_fields[0] else: return "" def fields(self, tag): """ Returns the list of values corresonding to "tag". If tag has an undefined subcode (such as 999C5), the function returns a list of dictionaries, whoose keys are the subcodes and the values are the values of tag.subcode. If the tag has a subcode, simply returns list of values corresponding to tag. @param record the record to retrieve values from @param tag the marc code of a field @return values of field tag in record """ if self.get_record() == None: #Case where BibRecord could not parse object return [] p_tag = parse_tag(tag) if p_tag[3] != "": #Subcode has been defined. Simply returns list of values return record_get_field_values(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) else: #Subcode is undefined. Returns list of dicts. #However it might be the case of a control field. list_of_dicts = [] instances = record_get_field_instances(self.get_record(), p_tag[0], p_tag[1], p_tag[2]) for instance in instances: instance_dict = dict(instance[0]) list_of_dicts.append(instance_dict) return list_of_dicts def kb(self, kb, string, default=""): """ Returns the value of the "string" in the knowledge base "kb". If kb does not exist or string does not exist in kb, returns 'default' string or empty string if not specified. @param kb a knowledge base name @param string the string we want to translate @param default a default value returned if 'string' not found in 'kb' """ if string == None: return default val = get_kb_mapping(kb, string, default) if val == None: return default else: return val def bf_profile(): """ Runs a benchmark """ for i in range(50): - format_record(i, "HD", ln=cdslang, verbose=9, search_pattern=None) + format_record(i, "HD", ln=cdslang, verbose=9, search_pattern=[]) return if __name__ == "__main__": import profile import pstats bf_profile() profile.run('bf_profile()', "bibformat_profile") p = pstats.Stats("bibformat_profile") p.strip_dirs().sort_stats("cumulative").print_stats() diff --git a/modules/bibformat/lib/bibformat_engine_tests.py b/modules/bibformat/lib/bibformat_engine_tests.py index b309fca84..4eb17afda 100644 --- a/modules/bibformat/lib/bibformat_engine_tests.py +++ b/modules/bibformat/lib/bibformat_engine_tests.py @@ -1,682 +1,691 @@ # -*- coding: utf-8 -*- ## $Id$ ## ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import unittest import os import re +import sys from invenio import bibformat_engine from invenio import bibformat_config from invenio import bibrecord +from invenio.config import tmpdir -outputs_path = "..%setc%soutput_formats" % (os.sep, os.sep) -templates_path = "..%setc%sformat_templates" % (os.sep, os.sep) -elements_path = "elements" -elements_import_path = "elements" +#outputs_path = "..%setc%soutput_formats" % (os.sep, os.sep) +#templates_path = "..%setc%sformat_templates" % (os.sep, os.sep) +#elements_path = "elements" +outputs_path = "%s" % (tmpdir) +templates_path = "%s" % (tmpdir) +elements_path = "%s%stests_bibformat_elements" % (tmpdir, os.sep) +elements_import_path = "tests_bibformat_elements" class FormatTemplateTest(unittest.TestCase): """ bibformat - tests on format templates""" def test_get_format_template(self): """bibformat - format template parsing and returned structure""" bibformat_engine.templates_path = templates_path #Test correct parsing and structure template_1 = bibformat_engine.get_format_template("Test1.bft", with_attributes=True) self.assert_(template_1 != None) self.assertEqual(template_1['code'], "test") self.assertEqual(template_1['attrs']['name'], "name_test") self.assertEqual(template_1['attrs']['description'], "desc_test") #Test correct parsing and structure of file without description or name - template_2 = bibformat_engine.get_format_template("Test 2.bft", with_attributes=True) + template_2 = bibformat_engine.get_format_template("Test_2.bft", with_attributes=True) self.assert_(template_2 != None) self.assertEqual(template_2['code'], "test") - self.assertEqual(template_2['attrs']['name'], "Test 2.bft") + self.assertEqual(template_2['attrs']['name'], "Test_2.bft") self.assertEqual(template_2['attrs']['description'], "") #Test correct parsing and structure of file without description or name unknown_template = bibformat_engine.get_format_template("test_no_template.test", with_attributes=True) self.assertEqual(unknown_template, None) def test_get_format_templates(self): """ bibformat - loading multiple format templates""" bibformat_engine.templates_path = templates_path templates = bibformat_engine.get_format_templates(with_attributes=True) #test correct loading self.assert_("Test1.bft" in templates.keys()) - self.assert_("Test 2.bft" in templates.keys()) + self.assert_("Test_2.bft" in templates.keys()) self.assert_("Test3.bft" in templates.keys()) self.assert_("Test_no_template.test" not in templates.keys()) #Test correct pasrsing and structure self.assertEqual(templates['Test1.bft']['code'], "test") self.assertEqual(templates['Test1.bft']['attrs']['name'], "name_test") self.assertEqual(templates['Test1.bft']['attrs']['description'], "desc_test") def test_get_format_template_attrs(self): """ bibformat - correct parsing of attributes in format template""" bibformat_engine.templates_path = templates_path attrs = bibformat_engine.get_format_template_attrs("Test1.bft") self.assertEqual(attrs['name'], "name_test") self.assertEqual(attrs['description'], "desc_test") def test_get_fresh_format_template_filename(self): """ bibformat - getting fresh filename for format template""" bibformat_engine.templates_path = templates_path filename_and_name_1 = bibformat_engine.get_fresh_format_template_filename("Test") self.assert_(len(filename_and_name_1) >= 2) self.assertEqual(filename_and_name_1[0], "Test.bft") filename_and_name_2 = bibformat_engine.get_fresh_format_template_filename("Test1") self.assert_(len(filename_and_name_2) >= 2) self.assert_(filename_and_name_2[0] != "Test1.bft") path = bibformat_engine.templates_path + os.sep + filename_and_name_2[0] self.assert_(not os.path.exists(path)) class FormatElementTest(unittest.TestCase): """ bibformat - tests on format templates""" + def setUp(self): + """bibformat - setting python path to test elements""" + sys.path.append('%s' % tmpdir) + def test_resolve_format_element_filename(self): """bibformat - resolving format elements filename """ bibformat_engine.elements_path = elements_path - #Test elements filename starting without bfe_, and with spaces in filename + #Test elements filename starting without bfe_, with underscore instead of space filenames = ["test 1", "test 1.py", "bfe_test 1", "bfe_test 1.py", "BFE_test 1", "BFE_TEST 1", "BFE_TEST 1.py", "BFE_TeST 1.py", "BFE_TeST 1", "BfE_TeST 1.py", "BfE_TeST 1","test_1", "test_1.py", "bfe_test_1", "bfe_test_1.py", "BFE_test_1", "BFE_TEST_1", "BFE_TEST_1.py", "BFE_Test_1.py", "BFE_TeST_1", "BfE_TeST_1.py", "BfE_TeST_1"] for i in range(len(filenames)-2): filename_1 = bibformat_engine.resolve_format_element_filename(filenames[i]) self.assert_(filename_1 != None) filename_2 = bibformat_engine.resolve_format_element_filename(filenames[i+1]) self.assertEqual(filename_1, filename_2) #Test elements filename starting with bfe_, and with underscores instead of spaces filenames = ["test 2", "test 2.py", "bfe_test 2", "bfe_test 2.py", "BFE_test 2", "BFE_TEST 2", "BFE_TEST 2.py", "BFE_TeST 2.py", "BFE_TeST 2", "BfE_TeST 2.py", "BfE_TeST 2","test_2", "test_2.py", "bfe_test_2", "bfe_test_2.py", "BFE_test_2", "BFE_TEST_2", "BFE_TEST_2.py", "BFE_TeST_2.py", "BFE_TeST_2", "BfE_TeST_2.py", "BfE_TeST_2"] for i in range(len(filenames)-2): filename_1 = bibformat_engine.resolve_format_element_filename(filenames[i]) self.assert_(filename_1 != None) filename_2 = bibformat_engine.resolve_format_element_filename(filenames[i+1]) self.assertEqual(filename_1, filename_2) #Test non existing element non_existing_element = bibformat_engine.resolve_format_element_filename("BFE_NON_EXISTING_ELEMENT") self.assertEqual(non_existing_element, None) def test_get_format_element(self): """bibformat - format elements parsing and returned structure""" bibformat_engine.elements_path = elements_path bibformat_engine.elements_import_path = elements_import_path #Test loading with different kind of names, for element with spaces in name, without bfe_ element_1 = bibformat_engine.get_format_element("test 1", with_built_in_params=True) self.assert_(element_1 != None) element_1_bis = bibformat_engine.get_format_element("bfe_tEst_1.py", with_built_in_params=True) self.assertEqual(element_1, element_1_bis) #Test loading with different kind of names, for element without spaces in name, wit bfe_ element_2 = bibformat_engine.get_format_element("test 2", with_built_in_params=True) self.assert_(element_2 != None) element_2_bis = bibformat_engine.get_format_element("bfe_tEst_2.py", with_built_in_params=True) self.assertEqual(element_2, element_2_bis) #Test loading incorrect elements element_3 = bibformat_engine.get_format_element("test 3", with_built_in_params=True) self.assertEqual(element_3, None) element_4 = bibformat_engine.get_format_element("test 4", with_built_in_params=True) self.assertEqual(element_4, None) unknown_element = bibformat_engine.get_format_element("TEST_NO_ELEMENT", with_built_in_params=True) self.assertEqual(unknown_element, None) #Test element without docstring element_5 = bibformat_engine.get_format_element("test_5", with_built_in_params=True) self.assert_(element_5 != None) self.assertEqual(element_5['attrs']['description'], '') self.assert_({'name':"param1", 'description':"(no description provided)", 'default':""} in element_5['attrs']['params'] ) self.assertEqual(element_5['attrs']['seealso'], []) #Test correct parsing: #Test type of element self.assertEqual(element_1['type'], "python") #Test name = element filename, with underscore instead of spaces, #without BFE_ and uppercase self.assertEqual(element_1['attrs']['name'], "TEST_1") #Test description parsing self.assertEqual(element_1['attrs']['description'], "Prints test") #Test @see parsing self.assertEqual(element_1['attrs']['seealso'], ["element2.py", "unknown_element.py"]) #Test @param parsing self.assert_({'name':"param1", 'description':"desc 1", 'default':""} in element_1['attrs']['params'] ) self.assert_({'name':"param2", 'description':"desc 2", 'default':"default value"} in element_1['attrs']['params'] ) #Test non existing element non_existing_element = bibformat_engine.get_format_element("BFE_NON_EXISTING_ELEMENT") self.assertEqual(non_existing_element, None) def test_get_format_element_attrs_from_function(self): """ bibformat - correct parsing of attributes in 'format' docstring""" bibformat_engine.elements_path = elements_path bibformat_engine.elements_import_path = elements_import_path element_1 = bibformat_engine.get_format_element("test 1", with_built_in_params=True) function = element_1['code'] attrs = bibformat_engine.get_format_element_attrs_from_function(function, element_1['attrs']['name'], with_built_in_params=True) self.assertEqual(attrs['name'], "TEST_1") #Test description parsing self.assertEqual(attrs['description'], "Prints test") #Test @see parsing self.assertEqual(attrs['seealso'], ["element2.py", "unknown_element.py"]) def test_get_format_elements(self): """bibformat - multiple format elements parsing and returned structure""" bibformat_engine.elements_path = elements_path bibformat_engine.elements_import_path = elements_import_path elements = bibformat_engine.get_format_elements() self.assert_(isinstance(elements, dict)) self.assertEqual(elements['TEST_1']['attrs']['name'], "TEST_1") self.assertEqual(elements['TEST_2']['attrs']['name'], "TEST_2") self.assert_("TEST_3" not in elements.keys()) self.assert_("TEST_4" not in elements.keys()) class OutputFormatTest(unittest.TestCase): """ bibformat - tests on output formats""" def test_get_output_format(self): """ bibformat - output format parsing and returned structure """ bibformat_engine.outputs_path = outputs_path filename_1 = bibformat_engine.resolve_output_format_filename("test1") output_1 = bibformat_engine.get_output_format(filename_1, with_attributes=True) self.assertEqual(output_1['attrs']['names']['generic'], "") self.assert_(isinstance(output_1['attrs']['names']['ln'], dict)) self.assert_(isinstance(output_1['attrs']['names']['sn'], dict)) self.assertEqual(output_1['attrs']['code'], "TEST1") self.assert_(len(output_1['attrs']['code']) <= 6) self.assertEqual(len(output_1['rules']), 4) self.assertEqual(output_1['rules'][0]['field'], '980.a') self.assertEqual(output_1['rules'][0]['template'], 'Picture_HTML_detailed.bft') self.assertEqual(output_1['rules'][0]['value'], 'PICTURE ') self.assertEqual(output_1['rules'][1]['field'], '980.a') self.assertEqual(output_1['rules'][1]['template'], 'Article.bft') self.assertEqual(output_1['rules'][1]['value'], 'ARTICLE') self.assertEqual(output_1['rules'][2]['field'], '980__a') self.assertEqual(output_1['rules'][2]['template'], 'Thesis_detailed.bft') self.assertEqual(output_1['rules'][2]['value'], 'THESIS ') self.assertEqual(output_1['rules'][3]['field'], '980__a') self.assertEqual(output_1['rules'][3]['template'], 'Pub.bft') self.assertEqual(output_1['rules'][3]['value'], 'PUBLICATION ') filename_2 = bibformat_engine.resolve_output_format_filename("TEST2") output_2 = bibformat_engine.get_output_format(filename_2, with_attributes=True) self.assertEqual(output_2['attrs']['names']['generic'], "") self.assert_(isinstance(output_2['attrs']['names']['ln'], dict)) self.assert_(isinstance(output_2['attrs']['names']['sn'], dict)) self.assertEqual(output_2['attrs']['code'], "TEST2") self.assert_(len(output_2['attrs']['code']) <= 6) self.assertEqual(output_2['rules'], []) unknown_output = bibformat_engine.get_output_format("unknow", with_attributes=True) self.assertEqual(unknown_output, {'rules':[], 'default':"", 'attrs':{'names':{'generic':"", 'ln':{}, 'sn':{}}, 'description':'', 'code':"UNKNOW", 'content_type':""}}) def test_get_output_formats(self): """ bibformat - loading multiple output formats """ bibformat_engine.outputs_path = outputs_path outputs = bibformat_engine.get_output_formats(with_attributes=True) self.assert_(isinstance(outputs, dict)) self.assert_("TEST1.bfo" in outputs.keys()) self.assert_("TEST2.bfo" in outputs.keys()) self.assert_("unknow.bfo" not in outputs.keys()) #Test correct parsing output_1 = outputs["TEST1.bfo"] self.assertEqual(output_1['attrs']['names']['generic'], "") self.assert_(isinstance(output_1['attrs']['names']['ln'], dict)) self.assert_(isinstance(output_1['attrs']['names']['sn'], dict)) self.assertEqual(output_1['attrs']['code'], "TEST1") self.assert_(len(output_1['attrs']['code']) <= 6) def test_get_output_format_attrs(self): """ bibformat - correct parsing of attributes in output format""" bibformat_engine.outputs_path = outputs_path attrs= bibformat_engine.get_output_format_attrs("TEST1") self.assertEqual(attrs['names']['generic'], "") self.assert_(isinstance(attrs['names']['ln'], dict)) self.assert_(isinstance(attrs['names']['sn'], dict)) self.assertEqual(attrs['code'], "TEST1") self.assert_(len(attrs['code']) <= 6) def test_resolve_output_format(self): """ bibformat - resolving output format filename""" bibformat_engine.outputs_path = outputs_path filenames = ["test1", "test1.bfo", "TEST1", "TeST1", "TEST1.bfo", "test1"] for i in range(len(filenames)-2): filename_1 = bibformat_engine.resolve_output_format_filename(filenames[i]) self.assert_(filename_1 != None) filename_2 = bibformat_engine.resolve_output_format_filename(filenames[i+1]) self.assertEqual(filename_1, filename_2) def test_get_fresh_output_format_filename(self): """ bibformat - getting fresh filename for output format""" bibformat_engine.outputs_path = outputs_path filename_and_name_1 = bibformat_engine.get_fresh_output_format_filename("test") self.assert_(len(filename_and_name_1) >= 2) self.assertEqual(filename_and_name_1[0], "TEST.bfo") filename_and_name_1_bis = bibformat_engine.get_fresh_output_format_filename("") self.assert_(len(filename_and_name_1_bis) >= 2) self.assertEqual(filename_and_name_1_bis[0], "TEST.bfo") filename_and_name_2 = bibformat_engine.get_fresh_output_format_filename("test1") self.assert_(len(filename_and_name_2) >= 2) self.assert_(filename_and_name_2[0] != "TEST1.bfo") path = bibformat_engine.outputs_path + os.sep + filename_and_name_2[0] self.assert_(not os.path.exists(path)) filename_and_name_3 = bibformat_engine.get_fresh_output_format_filename("test1testlong") self.assert_(len(filename_and_name_3) >= 2) self.assert_(filename_and_name_3[0] != "TEST1TESTLONG.bft") self.assert_(len(filename_and_name_3[0]) <= 6 + 1 + len(bibformat_config.format_output_extension)) path = bibformat_engine.outputs_path + os.sep + filename_and_name_3[0] self.assert_(not os.path.exists(path)) class PatternTest(unittest.TestCase): """ bibformat - tests on re patterns""" def test_pattern_lang(self): """ bibformat - correctness of pattern 'pattern_lang'""" text = '''

Here is my test text

Some wordsQuelques motsEinige Wörter garbage Here ends the middle of my test text EnglishFrançaisDeutsch Here ends my test text

''' result = bibformat_engine.pattern_lang.search(text) self.assertEqual(result.group("langs"), "Some wordsQuelques motsEinige Wörter garbage ") text = '''

Here is my test text

''' result = bibformat_engine.pattern_lang.search(text) self.assertEqual(result.group("langs"), "Some wordsQuelques motsEinige Wörter garbage ") def test_ln_pattern(self): """ bibformat - correctness of pattern 'ln_pattern'""" text = "Some wordsQuelques motsEinige Wörter garbage " result = bibformat_engine.ln_pattern.search(text) self.assertEqual(result.group(1), "en") self.assertEqual(result.group(2), "Some words") def test_pattern_format_template_name(self): """ bibformat - correctness of pattern 'pattern_format_template_name'""" text = ''' garbage a name a description on 2 lines

the content of the template

content ''' result = bibformat_engine.pattern_format_template_name.search(text) self.assertEqual(result.group('name'), "a name") def test_pattern_format_template_desc(self): """ bibformat - correctness of pattern 'pattern_format_template_desc'""" text = ''' garbage a name a description on 2 lines

the content of the template

content ''' result = bibformat_engine.pattern_format_template_desc.search(text) self.assertEqual(result.group('desc'), '''a description on 2 lines ''') def test_pattern_tag(self): """ bibformat - correctness of pattern 'pattern_tag'""" text = ''' garbage but part of content a name a description on 2 lines

the content of the template

my content is so nice! ''' result = bibformat_engine.pattern_tag.search(text) self.assertEqual(result.group('function_name'), "tiTLE") self.assertEqual(result.group('params'), 'param1="value1" param2=""') def test_pattern_tag(self): """ bibformat - correctness of pattern 'pattern_tag'""" text = ''' garbage but part of content a name a description on 2 lines

the content of the template

my content is so nice! ''' result = bibformat_engine.pattern_tag.search(text) self.assertEqual(result.group('function_name'), "tiTLE") self.assertEqual(result.group('params').strip(), '''param1="value1" param2=""''') def test_pattern_function_params(self): """ bibformat - correctness of pattern 'test_pattern_function_params'""" text = ''' param1="" param2="value2" param3="value3" garbage ''' names = ["param1", "param2", "param3"] values = ["", "value2", "value3"] results = bibformat_engine.pattern_format_element_params.finditer(text) #TODO param_i = 0 for match in results: self.assertEqual(match.group('param'), names[param_i]) self.assertEqual(match.group('value'), values [param_i]) param_i += 1 def test_pattern_format_element_params(self): """ bibformat - correctness of pattern 'pattern_format_element_params'""" text = ''' a description for my element some text @param param1 desc1 @param param2 desc2 @see seethis, seethat ''' names = ["param1", "param2"] descriptions = ["desc1", "desc2"] results = bibformat_engine.pattern_format_element_params.finditer(text) #TODO param_i = 0 for match in results: self.assertEqual(match.group('name'), names[param_i]) self.assertEqual(match.group('desc'), descriptions[param_i]) param_i += 1 def test_pattern_format_element_seealso(self): """ bibformat - correctness of pattern 'pattern_format_element_seealso' """ text = ''' a description for my element some text @param param1 desc1 @param param2 desc2 @see seethis, seethat ''' result = bibformat_engine.pattern_format_element_seealso.search(text) self.assertEqual(result.group('see').strip(), 'seethis, seethat') class MiscTest(unittest.TestCase): """ bibformat - tests on various functions""" def test_parse_tag(self): """ bibformat - result of parsing tags""" tags_and_parsed_tags = ['245COc', ['245', 'C', 'O', 'c'], '245C_c', ['245', 'C', '' , 'c'], '245__c', ['245', '' , '' , 'c'], '245__$$c', ['245', '' , '' , 'c'], '245__$c', ['245', '' , '' , 'c'], '245 $c', ['245', '' , '' , 'c'], '245 $$c', ['245', '' , '' , 'c'], '245__.c', ['245', '' , '' , 'c'], '245 .c', ['245', '' , '' , 'c'], '245C_$c', ['245', 'C', '' , 'c'], '245CO$$c', ['245', 'C', 'O', 'c'], '245CO.c', ['245', 'C', 'O', 'c'], '245$c', ['245', '' , '' , 'c'], '245.c', ['245', '' , '' , 'c'], '245$$c', ['245', '' , '' , 'c'], '245__%', ['245', '' , '' , '%'], '245__$$%', ['245', '' , '' , '%'], '245__$%', ['245', '' , '' , '%'], '245 $%', ['245', '' , '' , '%'], '245 $$%', ['245', '' , '' , '%'], '245$%', ['245', '' , '' , '%'], '245.%', ['245', '' , '' , '%'], '245_O.%', ['245', '' , 'O', '%'], '245.%', ['245', '' , '' , '%'], '245$$%', ['245', '' , '' , '%'], '2%5$$a', ['2%5', '' , '' , 'a'], '2%%%%a', ['2%%', '%', '%', 'a'], '2%%__a', ['2%%', '' , '' , 'a'], '2%%a', ['2%%', '' , '' , 'a']] for i in range(0, len(tags_and_parsed_tags), 2): parsed_tag = bibformat_engine.parse_tag(tags_and_parsed_tags[i]) self.assertEqual(parsed_tag, tags_and_parsed_tags[i+1]) class FormatTest(unittest.TestCase): """ bibformat - generic tests on function that do the formatting. Main functions""" def setUp(self): """ bibformat - prepare BibRecord objects""" self.xml_text_1 = ''' 33 thesis Doe1, John Doe2, John editor On the foo and bar1 On the foo and bar2 ''' #rec_1 = bibrecord.create_record(self.xml_text_1) self.bfo_1 = bibformat_engine.BibFormatObject(recID=None, ln='fr', xml_record=self.xml_text_1) self.xml_text_2 = ''' 33 thesis Doe1, John Doe2, John editor On the foo and bar1 On the foo and bar2 ''' #self.rec_2 = bibrecord.create_record(xml_text_2) self.bfo_2 = bibformat_engine.BibFormatObject(recID=None, ln='fr', xml_record=self.xml_text_2) self.xml_text_3 = ''' 33 eng Doe1, John Doe2, John editor On the foo and bar1 On the foo and bar2 article ''' #self.rec_3 = bibrecord.create_record(xml_text_3) self.bfo_3 = bibformat_engine.BibFormatObject(recID=None, ln='fr', xml_record=self.xml_text_3) def test_decide_format_template(self): """ bibformat - choice made by function decide_format_template""" bibformat_engine.outputs_path = outputs_path result = bibformat_engine.decide_format_template(self.bfo_1, "test1") self.assertEqual(result, "Thesis_detailed.bft") result = bibformat_engine.decide_format_template(self.bfo_3, "test3") self.assertEqual(result, "Test3.bft") #Only default matches result = bibformat_engine.decide_format_template(self.bfo_2, "test1") self.assertEqual(result, "Default_HTML_detailed.bft") #No match at all for record result = bibformat_engine.decide_format_template(self.bfo_2, "test2") self.assertEqual(result, None) #Non existing output format result = bibformat_engine.decide_format_template(self.bfo_2, "UNKNOW") self.assertEqual(result, None) def test_format_record(self): """ bibformat - correct formatting""" bibformat_engine.outputs_path = outputs_path bibformat_engine.elements_path = elements_path bibformat_engine.elements_import_path = elements_import_path bibformat_engine.templates_path = templates_path - #use output format that has no match - result = bibformat_engine.format_record(recID=None, of="test2", xml_record=self.xml_text_2) - self.assertEqual(result.replace("\n", ""),"") - + #use output format that has no match TEST DISABLED DURING MIGRATION + #result = bibformat_engine.format_record(recID=None, of="test2", xml_record=self.xml_text_2) + #self.assertEqual(result.replace("\n", ""),"") + #use output format that link to unknown template result = bibformat_engine.format_record(recID=None, of="test3", xml_record=self.xml_text_2) self.assertEqual(result.replace("\n", ""),"") - #Unknown output format - result = bibformat_engine.format_record(recID=None, of="unkno", xml_record=self.xml_text_3) - self.assertEqual(result.replace("\n", ""),"") + #Unknown output format TEST DISABLED DURING MIGRATION + #result = bibformat_engine.format_record(recID=None, of="unkno", xml_record=self.xml_text_3) + #self.assertEqual(result.replace("\n", ""),"") #Default formatting result = bibformat_engine.format_record(recID=None, ln='fr', of="test3", xml_record=self.xml_text_3) self.assertEqual(result,'''

hi

this is my template\ntesttfrgarbage\ntest me!oka default valueeditor''') def test_format_with_format_template(self): """ bibformat - correct formatting with given template""" bibformat_engine.elements_path = elements_path bibformat_engine.elements_import_path = elements_import_path bibformat_engine.templates_path = templates_path template = bibformat_engine.get_format_template("Test3.bft") result = bibformat_engine.format_with_format_template(format_template_filename = None, bfo=self.bfo_1, verbose=0, format_template_code=template['code']) self.assert_(isinstance(result, tuple)) self.assertEqual(result[0],'''

hi

this is my template\ntesttfrgarbage\ntest me!oka default valueeditor''') def create_test_suite(): """Return test suite for the bibformat module""" return unittest.TestSuite((unittest.makeSuite(FormatTemplateTest,'test'), unittest.makeSuite(OutputFormatTest,'test'), unittest.makeSuite(FormatElementTest,'test'), unittest.makeSuite(PatternTest,'test'), unittest.makeSuite(MiscTest,'test'), unittest.makeSuite(FormatTest,'test'))) if __name__ == '__main__': unittest.TextTestRunner(verbosity=2).run(create_test_suite()) diff --git a/modules/bibformat/lib/bibformat_migration_kit_assistant_lib.py b/modules/bibformat/lib/bibformat_migration_kit_assistant_lib.py index 53af85d28..d0ac8a265 100644 --- a/modules/bibformat/lib/bibformat_migration_kit_assistant_lib.py +++ b/modules/bibformat/lib/bibformat_migration_kit_assistant_lib.py @@ -1,185 +1,174 @@ # -*- coding: utf-8 -*- ## $Id$ ## Deal with Bibformat configuraion files. ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. __lastupdated__ = """$Date$""" import os from invenio.config import cdslang, weburl, etcdir from invenio.urlutils import wash_url_argument from invenio.messages import gettext_set_language, wash_language from invenio.errorlib import get_msgs_for_code_list import invenio.template migration_kit_templates = invenio.template.load('bibformat_migration_kit') status_filename = 'migration_status.txt' status_filepath = etcdir + os.sep +"bibformat" + os.sep + status_filename def getnavtrail(previous = '', ln=cdslang): """Get the navtrail""" previous = wash_url_argument(previous, 'str') ln = wash_language(ln) _ = gettext_set_language(ln) navtrail = """%s > %s """ % (weburl, _("Admin Area"), weburl, _("BibFormat Admin")) navtrail = navtrail + previous return navtrail def perform_request_migration_kit_status(ln=cdslang): """ Show the user migration status """ warnings = [] #Check that we can write in etc/bibformat and edit the migration status. #Else do not allow migration if not can_write_migration_status_file(): warnings.append(("WRN_BIBFORMAT_CANNOT_WRITE_MIGRATION_STATUS")) if not can_write_etc_bibformat_dir(): warnings.append(("WRN_BIBFORMAT_CANNOT_WRITE_IN_ETC_BIBFORMAT")) if len(warnings) > 0: warnings = get_msgs_for_code_list(warnings, 'warning', ln) warnings = [x[1] for x in warnings] # Get only message, not code return migration_kit_templates.tmpl_admin_cannot_migrate(warnings) else: status = read_status() steps = [] steps.append({'link':weburl+"/admin/bibformat/bibformat_migration_kit_assistant.py/migrate_kb", 'label':"Migrate knowledge bases", 'status':status['kbs']}) steps.append({'link':weburl+"/admin/bibformat/bibformat_migration_kit_assistant.py/migrate_behaviours",'label':"Migrate behaviours", 'status':status['behaviours']}) steps.append({'link':weburl+"/admin/bibformat/bibformat_migration_kit_assistant.py/migrate_formats",'label':"Migrate formats", 'status':status['formats']}) return migration_kit_templates.tmpl_admin_migration_status(ln, steps) def perform_request_migration_kit_knowledge_bases(ln=cdslang): """ Migrate and tell user """ status = bibformat_migration_kit.migrate_kbs() save_status("kbs", status) return migration_kit_templates.tmpl_admin_migrate_knowledge_bases(ln) def perform_request_migration_kit_behaviours(ln=cdslang): """ Migrate and tell user """ status = bibformat_migration_kit.migrate_behaviours() save_status("behaviours", status) return migration_kit_templates.tmpl_admin_migrate_behaviours(ln, status) def perform_request_migration_kit_formats(ln=cdslang): """ Display the different options and warnings to the user. Don't migrate yet """ return migration_kit_templates.tmpl_admin_migrate_formats(ln) def perform_request_migration_kit_formats_do(ln=cdslang): """ Migrate and tell user """ status = bibformat_migration_kit.migrate_formats() save_status("formats", status) return migration_kit_templates.tmpl_admin_migrate_formats_do(ln) def save_status(step, status="Not migrated"): """ Save the status of a step inside 'migration_status.txt' file """ text = "" old_value_replaced = False if os.path.exists(status_filepath): for line in open(status_filepath): #Try to replace previous value if line.startswith(step): text = text + step+"---"+status +"\n" old_value_replaced = True else: text = text +line if not old_value_replaced: #Else add value at the end text = text +"\n"+ step+"---"+status +"\n" else: text = step+"---"+status +"\n" file = open(status_filepath, 'w') file.write(text) file.close def read_status(): """ Read the status of the migration. Returns a dictionary with step name as key ('kbs', 'behaviours', 'formats') and status string as value """ - status = {'kbs':'Not Migrated', 'behaviours':'Not Migrated', 'formats':'Not Migrated', 'use old BibFormat':'YES'} + status = {'kbs':'Not Migrated', 'behaviours':'Not Migrated', 'formats':'Not Migrated'} try: if os.path.exists(status_filepath): for line in open(status_filepath): s_line = line.split("---") if len(line)>1: status[s_line[0]] = s_line[1].strip("\n") except: pass return status def can_write_migration_status_file(): """ Checks that we have write permission on file migration_status.txt in etc/bibformat directory. If file does not exist, return True if we have write permission in directory etc/bibformat directory to create this file. """ if os.path.exists(status_filepath): return os.access(status_filepath, os.W_OK) else: #check writability of etc/bibformat dir return can_write_etc_bibformat_dir() -def use_old_bibformat(): - """ - Returns True if the old BibFormat must be used. Else returns False. - """ - status = read_status() - - if status["use old BibFormat"] == "YES": - return True - else: - return False - from invenio import bibformat_migration_kit from invenio.bibformatadminlib import can_write_etc_bibformat_dir diff --git a/modules/bibformat/lib/bibformat_templates.py b/modules/bibformat/lib/bibformat_templates.py index 864962240..97546c502 100644 --- a/modules/bibformat/lib/bibformat_templates.py +++ b/modules/bibformat/lib/bibformat_templates.py @@ -1,2002 +1,2023 @@ # -*- coding: utf-8 -*- ## $Id$ ## Administration of Bibformat config files ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """HTML Templates for BibFormat administration""" __lastupdated__ = """$Date$""" # non Invenio imports import cgi # Invenio imports from invenio.messages import gettext_set_language from invenio.textutils import indent_text from invenio.config import weburl, sweburl from invenio.messages import language_list_long class Template: """Templating class, refer to bibformat.py for examples of call""" - def tmpl_admin_index(self, use_old_bibformat, ln, warnings): + def tmpl_admin_index(self, ln, warnings): """ Returns the main BibFormat admin page. - The page offers for some times the choice between the old and new BibFormat. - This is the only page where the code needs to be cleaned - when the migration kit will be removed. #FIXME: remove when removing migration kit - @param ln language - @param use_old_bibformat if True use old BibFormat. If False, use new. @param warnings a list of warnings to display at top of page. None if no warning @return main BibFormat admin page """ _ = gettext_set_language(ln) # load the right message language - if use_old_bibformat: - old_bibformat_checked = "checked" - new_bibformat_checked = "" - else: - old_bibformat_checked = "" - new_bibformat_checked = "checked" - - out = ''' For the time being it is recommended to use - this BibFormat administration interface




-














-














- ''' % weburl + out = '' if warnings: out += '''
%(warnings)s
''' % {'warnings': '
'.join(warnings)} out += '''
Bibformat has changed! -

You will need to migrate your formats. You can read the documentation to learn how to write +

You will need to migrate your formats. You can read the documentation to learn how to write formats, or use the migration assistant.

However for some time the old Bibformat will still run along the new one, so that you can - transition smoothly. Choose which one to run:

-
-
- - - - -
-
+ transition smoothly.

- ''' % {'old_bibformat_checked':old_bibformat_checked, - 'new_bibformat_checked':new_bibformat_checked, - 'weburl':weburl} + ''' % {'weburl':weburl} out += '''

This is where you can edit the formatting style available for the collections. You need to login to enter.

- -
-
Manage Output Formats
-
Create, edit and delete output formats, the rules that define which format template must be used for a given record.
-
Manage Format Templates
Create, edit and delete format templates, which define how to format a record.
+
Manage Output Formats
+
Create, edit and delete output formats, the rules that define which format template must be used for a given record.
+
+
Format Elements Documentation
Documentation of the format elements to be used inside format templates.
Manage Knowledge Bases
Specify how an incomplete or non standard record has to be transformed into a nice standard text.
BibFormat Admin Guide
Everything you want to know about BibFormat administration

OLD BIBFORMAT ADMIN FOLLOWS:

The BibFormat admin interface enables you to specify how the bibliographic data is presented to the end user in the search interface and search results pages. For example, you may specify that titles should be printed in bold font, the abstract in small italic, etc. Moreover, the BibFormat is not only a simple bibliographic data output formatter, but also an automated link constructor. For example, from the information on journal name and pages, it may automatically create links to publisher's site based on some configuration rules.

Configuring BibFormat

By default, a simple HTML format based on the most common fields (title, author, abstract, keywords, fulltext link, etc) is defined. You certainly want to define your own ouput formats in case you have a specific metadata structure.

Here is a short guide of what you can configure:

Behaviours
Define one or more output BibFormat behaviours. These are then passed as parameters to the BibFormat modules while executing formatting.
Example: You can tell BibFormat that is has to enrich the incoming metadata file by the created format, or that it only has to print the format out.
Extraction Rules
Define how the metadata tags from input are mapped into internal BibFormat variable names. The variable names can afterwards be used in formatting and linking rules.
Example: You can tell that 100 $a field should be mapped into $100.a internal variable that you could use later.
Link Rules
Define rules for automated creation of URI links from mapped internal variables.
Example: You can tell a rule how to create a link to People database out of the $100.a internal variable repesenting author's name. (The $100.a variable was mapped in the previous step, see the Extraction Rules.)
File Formats
Define file format types based on file extensions. This will be used when proposing various fulltext services.
Example: You can tell that *.pdf files will be treated as PDF files.
User Defined Functions (UDFs)
Define your own functions that you can reuse when creating your own output formats. This enables you to do complex formatting without ever touching the BibFormat core code.
Example: You can define a function how to match and extract email addresses out of a text file.
Formats
Define the output formats, i.e. how to create the output out of internal BibFormat variables that were extracted in a previous step. This is the functionality you would want to configure most of the time. It may reuse formats, user defined functions, knowledge bases, etc.
Example: You can tell that authors should be printed in italic, that if there are more than 10 authors only the first three should be printed, etc.
Knowledge Bases (KBs)
Define one or more knowledge bases that enables you to transform various forms of input data values into the unique standard form on the output.
Example: You can tell that Phys Rev D and Physical Review D are both the same journal and that these names should be standardized to Phys Rev : D.
Execution Test
Enables you to test your formats on your sample data file. Useful when debugging newly created formats.

To learn more on BibFormat configuration, you can consult the BibFormat Admin Guide.

Running BibFormat

From the Web interface

Run Reformat Records tool. This tool permits you to update stored formats for bibliographic records.
It should normally be used after configuring BibFormat's Behaviours and Formats. When these are ready, you can choose to rebuild formats for selected collections or you can manually enter a search query and the web interface will accomplish all necessary formatting steps.
Example: You can request Photo collections to have their HTML brief formats rebuilt, or you can reformat all the records written by Ellis.

From the command-line interface

Consider having an XML MARC data file that is to be uploaded into the CDS Invenio. (For example, it might have been harvested from other sources and processed via BibConvert.) Having configured BibFormat and its default output type behaviour, you would then run this file throught BibFormat as follows:

         $ bibformat < /tmp/sample.xml > /tmp/sample_with_fmt.xml
         
         
that would create default HTML formats and would "enrich" the input XML data file by this format. (You would then continue the upload procedure by calling successively BibUpload and BibWords.)

Now consider a different situation. You would like to add a new possible format, say "HTML portfolio" and "HTML captions" in order to nicely format multiple photographs in one page. Let us suppose that these two formats are called hp and hc and are already loaded in the collection_format table. (TODO: describe how this is done via WebAdmin.) You would then proceed as follows: firstly, you would prepare the corresponding output behaviours called HP and HC (TODO: note the uppercase!) that would not enrich the input file but that would produce an XML file with only 001 and FMT tags. (This is in order not to update the bibliographic information but the formats only.) You would also prepare corresponding formats at the same time. Secondly, you would launch the formatting as follows:

         $ bibformat otype=HP,HC < /tmp/sample.xml > /tmp/sample_fmts_only.xml
         
         
that should give you an XML file containing only 001 and FMT tags. Finally, you would upload the formats:
         $ bibupload < /tmp/sample_fmts_only.xml
         
         
and that's it. The new formats should now appear in WebSearch.
''' % {'weburl':weburl, 'ln':ln} return indent_text(out) def tmpl_admin_format_template_show_attributes(self, ln, name, description, filename, editable): """ Returns a page to change format template name and description @param ln language @param name the name of the format @param description the description of the format @param filename the filename of the template @param editable True if we let user edit, else False @return editor for 'format' """ _ = gettext_set_language(ln) # load the right message language out = "" out += ''' - +
%(menu)s
0. %(close_editor)s  1. %(template_editor)s  2. %(modify_template_attributes)s  3. %(check_dependencies)s 

''' % {'ln':ln, 'menu':_("Menu"), 'filename':filename, 'close_editor': _("Close Editor"), 'modify_template_attributes': _("Modify Template Attributes"), 'template_editor': _("Template Editor"), 'check_dependencies': _("Check Dependencies") } disabled = "" readonly = "" if not editable: disabled = 'disabled="disabled"' readonly = 'readonly="readonly"' out += '''
%(name)s attributes [?]
 
''' % {"name": name, "description": description, 'ln':ln, 'filename':filename, 'disabled':disabled, 'readonly':readonly, 'description_label': _("Description"), 'name_label': _("Name"), 'update_format_attributes': _("Update Format Attributes"), 'weburl':weburl } return out def tmpl_admin_format_template_show_dependencies(self, ln, name, filename, output_formats, format_elements, tags): """ Shows the dependencies (on elements) of the given format. @param name the name of the template @param filename the filename of the template @param format_elements the elements (and list of tags in each element) this template depends on @param output_formats the output format that depend on this template @param tags the tags that are called by format elements this template depends on. """ _ = gettext_set_language(ln) # load the right message language out = ''' - +
- +
%(menu)s
0. %(close_editor)s 0. %(close_editor)s  1. %(template_editor)s  2. %(modify_template_attributes)s  3. %(check_dependencies)s 
- +
Output Formats that use %(name)s Format Elements used by %(name)s* All Tags Called*
 
''' % {'ln':ln, 'filename':filename, 'menu': _("Menu"), 'close_editor': _("Close Editor"), 'modify_template_attributes': _("Modify Template Attributes"), 'template_editor': _("Template Editor"), 'check_dependencies': _("Check Dependencies"), 'name': name } #Print output formats if len(output_formats) == 0: out += '

No output format uses this format template.

' for output_format in output_formats: name = output_format['names']['generic'] filename = output_format['filename'] out += ''' %(name)s''' % {'filename':filename, 'name':name, 'ln':ln} if len(output_format['tags']) > 0: out += "("+", ".join(output_format['tags'])+")" out += "
" #Print format elements (and tags) out += '
 
' if len(format_elements) == 0: out += '

This format template uses no format element.

' for format_element in format_elements: name = format_element['name'] out += ''' %(name)s''' % {'name':"bfe_"+name.lower(), 'anchor':name.upper(), 'ln':ln} if len(format_element['tags']) > 0: out += "("+", ".join(format_element['tags'])+")" out += "
" #Print tags out += '
 
' if len(tags) == 0: out += '

This format template uses no tag.

' for tag in tags: out += '''%(tag)s
''' % { 'tag':tag} out += '''
*Note: Some tags linked with this format template might not be shown. Check manually. ''' return out def tmpl_admin_format_template_show(self, ln, name, description, code, filename, ln_for_preview, pattern_for_preview, editable, content_type_for_preview, content_types): """ Returns the editor for format templates. Edit 'format' @param ln language @param format the format to edit @param filename the filename of the template @param ln_for_preview the language for the preview (for bfo) @param pattern_for_preview the search pattern to be used for the preview (for bfo) @param editable True if we let user edit, else False @return editor for 'format' """ _ = gettext_set_language(ln) # load the right message language out = "" out += ''' - + + +
%(menu)s
0. %(close_editor)s  1. %(template_editor)s  2. %(modify_template_attributes)s  3. %(check_dependencies)s 
''' % {'ln': ln, 'filename': filename, 'menu': _("Menu"), 'label_show_doc': _("Show Documentation"), 'label_hide_doc': _("Hide Documentation"), 'close_editor': _("Close Editor"), 'modify_template_attributes': _("Modify Template Attributes"), 'template_editor': _("Template Editor"), - 'check_dependencies': _("Check Dependencies") + 'check_dependencies': _("Check Dependencies"), + 'weburl': sweburl or weburl } disabled = "" readonly = "" + toolbar = '' if not editable: disabled = 'disabled="disabled"' readonly = 'readonly="readonly"' - + toolbar = '' #First column: template code and preview out += ''' ''' % {'code':code, 'ln':ln, 'weburl':weburl, 'filename':filename, 'ln_for_preview':ln_for_preview, 'pattern_for_preview':pattern_for_preview } #Second column Print documentation out += '''
Format template code
+ %(toolbar)s +
Preview
   
Elements Documentation
''' % {'weburl':weburl, 'ln':ln} return out def tmpl_admin_format_template_show_short_doc(self, ln, format_elements): """ Prints the format element documentation in a condensed way to display inside format template editor. This page is different from others: it is displayed inside a