diff --git a/modules/bibformat/bin/bibreformat.in b/modules/bibformat/bin/bibreformat.in index 969364050..f43d01122 100644 --- a/modules/bibformat/bin/bibreformat.in +++ b/modules/bibformat/bin/bibreformat.in @@ -1,804 +1,804 @@ #!@PYTHON@ ## -*- mode: python; coding: utf-8; -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Call BibFormat engine and create HTML brief (and other) formats for bibliographic records. Upload formats via BibUpload.""" __revision__ = "$Id$" ## import interesting modules: try: from invenio.dbquery import run_sql, escape_string from invenio.config import \ weburl,\ tmpdir,\ bindir from invenio.search_engine import perform_request_search from invenio.search_engine import print_record, encode_for_xml from invenio.access_control_engine import acc_authorize_action from invenio.bibformat import format_record from invenio.bibformat_utils import encode_for_xml from invenio.bibformat_config import CFG_BIBFORMAT_USE_OLD_BIBFORMAT import getopt import getpass import marshal import signal import sys import os import re import time except ImportError, e: print "Error: %s" % e sys.exit(1) options = {} # global variable to hold task options sql_queries = [] # holds SQL queries to be executed cds_query = {} # holds CDS query parameters (fields, collection, pattern) process_format = 0 # flag, process records without created format process = 1 # flag, process records (unless count only) fmt = "hb" # default format to be processed sleeptime = "" # default sleeptime format_string = "%Y-%m-%d %H:%M:%S" # date/time format sched_time = time.strftime(format_string) # scheduled execution time in the date/time format ### run the bibreformat task bibsched scheduled ### def bibreformat_task(sql, sql_queries, cds_query, process_format): """ BibReformat main task """ global process, fmt t1 = os.times()[4] ### Query the database ### if process_format: print "Querying database for records with missing format ..." without_format = without_fmt(sql) recIDs = [] if cds_query['field'] != "" or \ cds_query['collection'] != "" or \ cds_query['pattern'] != "": print "Querying database for records with old format (CDS query)..." res = perform_request_search(req=None, of='id', c=cds_query['collection'], p=cds_query['pattern'], f=cds_query['field']) for item in res: recIDs.append(item) for sql_query in sql_queries: print "Querying database for records with old format (SQL query) ..." res = run_sql(sql_query) for item in res: recIDs.append(item[0]) ### list of corresponding record IDs was retrieved ### bibformat the records selected if process_format: print "Records to be processed: %d" % (len(recIDs) \ + len(without_format)) print "Out of it records without created format: %d" % len(without_format) else: print "Records to be processed: %d" % (len(recIDs)) ### Initialize main loop total_rec = 0 # Total number of records tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call ### Iterate over all records prepared in lists I (option) if process: if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this # when migration from php to # python bibformat is done (total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_old(recIDs, weburl, fmt) else: (total_rec_1, tbibformat_1, tbibupload_1) = iterate_over_new(recIDs, fmt) total_rec += total_rec_1 tbibformat += tbibformat_1 tbibupload += tbibupload_1 ### Iterate over all records prepared in list II (no_format) if process_format and process: if CFG_BIBFORMAT_USE_OLD_BIBFORMAT: # FIXME: remove this # when migration from php to # python bibformat is done (total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_old(without_format, weburl, fmt) else: (total_rec_2, tbibformat_2, tbibupload_2) = iterate_over_new(without_format, fmt) total_rec += total_rec_2 tbibformat += tbibformat_2 tbibupload += tbibupload_2 ### Final statistics t2 = os.times()[4] elapsed = t2 - t1 message = "total records processed: %d" % total_rec print message message = "total processing time: %2f sec" % elapsed print message message = "Time spent on external call (os.system):" print message message = " bibformat: %2f sec" % tbibformat print message message = " bibupload: %2f sec" % tbibupload print message ### Result set operations ### def lhdiff(l1, l2): "Does list difference via intermediate hash." d = {} ld = [] for e in l2: d[e] = 1 for e in l1: if not d.has_key(e): ld.append(e) return ld ### Result set operations ### def ldiff(l1, l2): "Returns l1 - l2." ld = [] for e in l1: if not e in l2: ld.append(e) return ld ### Identify recIDs of records with missing format ### def without_fmt(sql): "List of record IDs to be reformated, not having the specified format yet" format2 = [] all_rec_ids = [] q1 = sql['q1'] q2 = sql['q2'] ## get complete recID list all_rec_ids = run_sql(q1) ## get complete recID list of formatted records format1 = run_sql(q2) for item in format1: format2.append(item[0]) all_rec_ids = map(lambda x: x[0], all_rec_ids) return lhdiff(all_rec_ids, format2) ### Bibreformat all selected records (using new python bibformat) ### (see iterate_over_old further down) def iterate_over_new(list, fmt): - "Iterate odver list of IDs" + "Iterate over list of IDs" global options, total_rec n_it_rec = 0 # Number of records for current iteration n_it_max = 10000 # Number of max records in one iteration total_rec = 0 # Number of formatted records formatted_records = '' # (string-)List of formatted record of an iteration tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call for recID in list: total_rec += 1 n_it_rec += 1 if options["verbose"] >= 9: message = "Processing record %d with format %s (New BibFormat)" % (recID, fmt) print message ### bibformat external call ### t1 = os.times()[4] formatted_record = format_record(recID, fmt, on_the_fly=True) t2 = os.times()[4] tbibformat = tbibformat + (t2 - t1) # Encapsulate record in xml tags that bibupload understands prologue = ''' %s %s ''' % (recID, fmt) epilogue = ''' ''' formatted_records += prologue + encode_for_xml(formatted_record) + epilogue # every n_it_max record, upload all formatted records. # also upload if recID is last one if n_it_rec > n_it_max or total_rec == len(list): #Save formatted records to disk for bibupload finalfilename = "%s/rec_fmt_%s.xml" % (tmpdir, time.strftime('%Y%m%d_%H%M%S')) filehandle = open(finalfilename, "w") filehandle.write("" + \ formatted_records + \ "") filehandle.close() ### bibupload external call ### t1 = os.times()[4] if options["verbose"] >= 9: message = "START bibupload external call" print message command = "%s/bibupload -f %s" % (bindir, finalfilename) os.system(command) t2 = os.times()[4] tbibupload = tbibupload + (t2 - t1) if options["verbose"] >= 9: message = "END bibupload external call (time elapsed:%2f)" % (t2-t1) print message #Reset iteration state n_it_rec = 0 xml_content = '' return (total_rec, tbibformat, tbibupload) def iterate_over_old(list, weburl, fmt): - "Iterate odver list of IDs" + "Iterate over list of IDs" global options n_rec = 0 n_max = 10000 xml_content = '' # hold the contents tbibformat = 0 # time taken up by external call tbibupload = 0 # time taken up by external call total_rec = 0 # Number of formatted records for record in list: n_rec = n_rec + 1 total_rec = total_rec + 1 if options["verbose"] >= 9: message = "Processing record: %d" % (record) print message query = "id=%d&of=xm" % (record) count = 0 contents = print_record(record, 'xm') while (contents == "") and (count < 10): contents = print_record(record, 'xm') count = count + 1 time.sleep(10) if count == 10: sys.stderr.write("Failed to download %s from %s after 10 attempts... terminating" % (query, weburl)) sys.exit(0) xml_content = xml_content + contents if xml_content: if n_rec >= n_max: finalfilename = "%s/rec_fmt_%s.xml" % (tmpdir, time.strftime('%Y%m%d_%H%M%S')) filename = "%s/bibreformat.xml" % tmpdir filehandle = open(filename ,"w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### t11 = os.times()[4] if options["verbose"] >= 9: message = "START bibformat external call" print message command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (bindir, fmt.upper(), tmpdir, finalfilename, tmpdir) os.system(command) t22 = os.times()[4] if options["verbose"] >= 9: message = "END bibformat external call (time elapsed:%2f)" % (t22-t11) print message tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] if options["verbose"] >= 9: message = "START bibupload external call" print message command = "%s/bibupload -f %s" % (bindir, finalfilename) os.system(command) t22 = os.times()[4] if options["verbose"] >= 9: message = "END bibupload external call (time elapsed:%2f)" % (t22-t11) print message tbibupload = tbibupload + (t22- t11) n_rec = 0 xml_content = '' ### Process the last re-formated chunk ### if n_rec > 0: if options["verbose"] >= 9: print "Processing last record set (%d)" % n_rec finalfilename = "%s/rec_fmt_%s.xml" % (tmpdir, time.strftime('%Y%m%d_%H%M%S')) filename = "%s/bibreformat.xml" % tmpdir filehandle = open(filename ,"w") filehandle.write(xml_content) filehandle.close() ### bibformat external call ### t11 = os.times()[4] if options["verbose"] >= 9: message = "START bibformat external call" print message command = "%s/bibformat otype='%s' < %s/bibreformat.xml > %s 2> %s/bibreformat.err" % (bindir, fmt.upper(), tmpdir, finalfilename, tmpdir) os.system(command) t22 = os.times()[4] if options["verbose"] >= 9: message = "END bibformat external call (time elapsed:%2f)" % (t22 - t11) print message tbibformat = tbibformat + (t22 - t11) ### bibupload external call ### t11 = os.times()[4] if options["verbose"] >= 9: message = "START bibupload external call" print message command = "%s/bibupload -f %s" % (bindir, finalfilename) os.system(command) t22 = os.times()[4] if options["verbose"] >= 9: message = "END bibupload external call (time elapsed:%2f)" % (t22 - t11) print message tbibupload = tbibupload + (t22 - t11) return (total_rec, tbibformat, tbibupload) ### Bibshed compatibility procedures ### def get_date(var, format_string = "%Y-%m-%d %H:%M:%S"): """Returns a date string according to the format string. It can handle normal date strings and shifts with respect to now.""" date = time.time() shift_re = re.compile("([-\+]{0,1})([\d]+)([dhms])") factors = {"d":24*3600, "h":3600, "m":60, "s":1} m = shift_re.match(var) if m: sign = m.groups()[0] == "-" and -1 or 1 factor = factors[m.groups()[2]] value = float(m.groups()[1]) print value date = time.localtime(date + sign * factor * value) date = time.strftime(format_string, date) else: date = time.strptime(var, format_string) date = time.strftime(format_string, date) return date def write_message(msg, stream=sys.stdout): """Prints message and flush output stream (may be sys.stdout or sys.stderr). Useful for debugging stuff.""" if stream == sys.stdout or stream == sys.stderr: stream.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime())) stream.write("%s\n" % msg) stream.flush() else: sys.stderr.write("Unknown stream %s. [must be sys.stdout or sys.stderr]\n" % stream) def task_sig_sleep(sig, frame): """Signal handler for the 'sleep' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("task_sig_sleep(), got signal %s frame %s" % (sig, frame)) write_message("sleeping...") task_update_status("SLEEPING") signal.pause() # wait for wake-up signal def task_sig_wakeup(sig, frame): """Signal handler for the 'wakeup' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("task_sig_wakeup(), got signal %s frame %s" % (sig, frame)) write_message("continuing...") task_update_status("CONTINUING") def task_sig_stop(sig, frame): """Signal handler for the 'stop' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("task_sig_stop(), got signal %s frame %s" % (sig, frame)) write_message("stopping...") task_update_status("STOPPING") write_message("flushing cache or whatever...") time.sleep(3) write_message("closing tables or whatever...") time.sleep(1) write_message("stopped") task_update_status("STOPPED") sys.exit(0) def task_sig_suicide(sig, frame): """Signal handler for the 'suicide' signal sent by BibSched.""" if options["verbose"] >= 9: write_message("task_sig_suicide(), got signal %s frame %s" % (sig, frame)) write_message("suiciding myself now...") task_update_status("SUICIDING") write_message("suicided") task_update_status("SUICIDED") sys.exit(0) def task_sig_unknown(sig, frame): """Signal handler for the other unknown signals sent by shell or user.""" # do nothing for unknown signals: write_message("unknown signal %d (frame %s) ignored" % (sig, frame)) def authenticate(user, header="BibReformat Task Submission", action="runbibformat"): """Authenticate the user against the user database. Check for its password, if it exists. Check for action access rights. Return user name upon authorization success, do system exit upon authorization failure. """ print header print "=" * len(header) if user == "": print >> sys.stdout, "\rUsername: ", user = sys.stdin.readline().lower().strip() else: print >> sys.stdout, "\rUsername:", user ## first check user pw: res = run_sql("select id,password from user where email=%s", (user,), 1) + \ run_sql("select id,password from user where nickname=%s", (user,), 1) if not res: print "Sorry, %s does not exist." % user sys.exit(1) else: (uid_db, password_db) = res[0] if password_db: password_entered = getpass.getpass() if password_db == password_entered: pass else: print "Sorry, wrong credentials for %s." % user sys.exit(1) ## secondly check authorization for the action: (auth_code, auth_message) = acc_authorize_action(uid_db, action) if auth_code != 0: print auth_message sys.exit(1) return user def task_submit(): """Submits task to the BibSched task queue. This is what people will be invoking via command line.""" global options, sched_time, sleeptime ## sanity check: remove eventual "task" option: if options.has_key("task"): del options["task"] ## authenticate user: user = authenticate(options.get("user", "")) ## submit task: task_id = run_sql("""INSERT INTO schTASK (id,proc,user,status,arguments,sleeptime,runtime) VALUES (NULL,'bibreformat',%s,'WAITING',%s,%s,%s)""", (user, marshal.dumps(options), sleeptime, escape_string(sched_time))) ## update task number: options["task"] = task_id run_sql("""UPDATE schTASK SET arguments=%s WHERE id=%s""", (marshal.dumps(options), task_id)) write_message("Task #%d submitted." % task_id) return task_id def task_update_progress(msg): """Updates progress information in the BibSched task table.""" global options return run_sql("UPDATE schTASK SET progress=%s where id=%s", (msg, options["task"])) def task_update_status(val): """Updates status information in the BibSched task table.""" global options return run_sql("UPDATE schTASK SET status=%s where id=%s", (val, options["task"])) def task_read_status(task_id): """Read status information in the BibSched task table.""" res = run_sql("SELECT status FROM schTASK where id=%s", (task_id,), 1) try: out = res[0][0] except: out = 'UNKNOWN' return out def task_get_options(id): """Returns options for the task 'id' read from the BibSched task queue table.""" out = {} res = run_sql("SELECT arguments FROM schTASK WHERE id=%s AND proc='bibreformat'", (id,)) try: out = marshal.loads(res[0][0]) except: write_message("Error: BibReformat task %d does not seem to exist." % id) sys.exit(1) return out def task_run(task_id, process_format): """Runs the task by fetching arguments from the BibSched task queue. This is what BibSched will be invoking via daemon call.""" global options, process, fmt, sched_time options = task_get_options(task_id) # get options from BibSched task table ## check task id: if not options.has_key("task"): write_message("Error: The task #%d does not seem to be a BibReformat task." % task_id) return ## initialize parameters if options.has_key("format"): fmt = options["format"] else: fmt = "hb" sql = { "all" : "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='%s'" % fmt, "last": "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format='%s' and bf.last_updated < br.modification_date" % fmt, "q1" : "select br.id from bibrec as br", "q2" : "select br.id from bibrec as br, bibfmt as bf where bf.id_bibrec=br.id and bf.format ='%s'" % fmt } if options.has_key("all"): sql_queries.append(sql['all']) if options.has_key("without"): process_format = 1 if options.has_key("noprocess"): process = 0 if options.has_key("last"): sql_queries.append(sql['last']) if options.has_key("collection"): cds_query['collection'] = options['collection'] else: cds_query['collection'] = "" if options.has_key("field"): cds_query['field'] = options['field'] else: cds_query['field'] = "" if options.has_key("pattern"): cds_query['pattern'] = options['pattern'] else: cds_query['pattern'] = "" ### sql commands to be executed during the script run ### ## check task status: task_status = task_read_status(task_id) if task_status != "WAITING": write_message("Error: The task #%d is %s. I expected WAITING." % (task_id, task_status)) return ## update task status: task_update_status("RUNNING") ## initialize signal handler: signal.signal(signal.SIGUSR1, task_sig_sleep) signal.signal(signal.SIGTERM, task_sig_stop) signal.signal(signal.SIGABRT, task_sig_suicide) signal.signal(signal.SIGCONT, task_sig_wakeup) signal.signal(signal.SIGINT, task_sig_unknown) ## run the task: bibreformat_task(sql, sql_queries, cds_query, process_format) ## we are done: task_update_status("DONE") return def usage(exitcode=1, msg=""): """Prints usage info.""" if msg: sys.stderr.write("Error: %s.\n" % msg) sys.stderr.write("Usage: %s [options]\n" % sys.argv[0]) sys.stderr.write(" -u, --user=USER \t\t User name to submit the task as, password needed.\n") sys.stderr.write(" -h, --help \t\t Print this help.\n") sys.stderr.write(" -V, --version \t\t Print version information.\n") sys.stderr.write(" -v, --verbose=LEVEL \t\t Verbose level (0=min,1=normal,9=max).\n") sys.stderr.write(" -s, --sleeptime=SLEEP\t\t Time after which to repeat tasks (no)\n") sys.stderr.write(" -t, --time=DATE \t\t Moment for the task to be active (now).\n") sys.stderr.write(" -a, --all \t\t All records\n") sys.stderr.write(" -c, --collection \t\t Select records by collection\n") sys.stderr.write(" -f, --field \t\t Select records by field.\n") sys.stderr.write(" -p, --pattern \t\t Select records by pattern.\n") sys.stderr.write(" -o, --format \t\t Specify output format to be (re-)created. (default HB)\n") sys.stderr.write(" -n, --noprocess \t\t Count records to be processed only (no processing done)\n") sys.stderr.write("\n") sys.stderr.write(" Example: bibreformat -n Show how many records are to be bibreformated.") sys.stderr.write("\n") sys.exit(exitcode) def main(): """Main function that analyzes command line input and calls whatever is appropriate. Useful for learning on how to write BibSched tasks.""" global options, sched_time, sleeptime ## parse command line: if len(sys.argv) == 2 and sys.argv[1].isdigit(): ## A - run the task task_id = int(sys.argv[1]) process_format = 0 task_run(task_id, process_format) else: ## B - submit the task process_format = 0 options = {} # will hold command-line options options["verbose"] = 1 try: opts, args = getopt.getopt(sys.argv[1:], "hVv:u:ac:f:s:p:lo:nt:wl", ["help", "version", "verbose=", "user=", "all", "collection=", "field=", "sleeptime=", "pattern=", "format=", "noprocess", "time=", "without", "last"]) except getopt.GetoptError, err: usage(1, err) clp = 0 # default parameters flag try: for opt in opts: if opt[0] in ["-h", "--help"]: usage(0) elif opt[0] in ["-V", "--version"]: print __revision__ sys.exit(0) elif opt[0] in [ "-u", "--user"]: options["user"] = opt[1] elif opt[0] in ["-v", "--verbose"]: options["verbose"] = int(opt[1]) elif opt[0] in ["-a", "--all"]: options["all"] = 1 options["without"] = 1 clp = 1 elif opt[0] in ["-c", "--collection"]: options["collection"] = opt[1] clp = 1 elif opt[0] in ["-n", "--noprocess"]: options["noprocess"] = 1 elif opt[0] in ["-f", "--field"]: options["field"] = opt[1] clp = 1 elif opt[0] in ["-p","--pattern"]: options["pattern"] = opt[1] clp = 1 elif opt[0] in ["-o","--format"]: options["format"] = opt[1] elif opt[0] in ["-s", "--sleeptime" ]: get_date(opt[1]) # see if it is a valid shift sleeptime = opt[1] elif opt[0] in [ "-t", "--time" ]: sched_time = get_date(opt[1]) if clp == 0: # default options["without"] = 1 options["last"] = 1 except StandardError, e: usage(e) task_submit() return ### okay, here we go: if __name__ == '__main__': main() diff --git a/modules/bibformat/doc/admin/guide.html.wml b/modules/bibformat/doc/admin/guide.html.wml index 9684e46ac..01aefe429 100644 --- a/modules/bibformat/doc/admin/guide.html.wml +++ b/modules/bibformat/doc/admin/guide.html.wml @@ -1,2603 +1,2624 @@ ## $Id$ ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. #include "cdspage.wml" \ title="BibFormat Admin Guide" \ navtrail_previous_links="/admin/>_(Admin Area)_ > /admin/bibformat/>BibFormat Admin" \ navbar_name="admin" \ navbar_select="bibformat-admin-guide"

Version <: print generate_pretty_revision_date_string('$Id$'); :>

Please note that the old PHP BibFormat administration guide can be found further below.

Contents

  1. A Five minutes Introduction to BibFormat
    1. How BibFormat works
    2. Short Tutorial
    3. Administer Through the Web Interface or Through the Configuration files
  2. Output Formats
    1. Add an Output Format
    2. Remove an Output Format
    3. Edit the Rules of an Output Format
    4. Edit the Attributes of an Output Format
    5. Check the Dependencies an Output Format
    6. Check the Validity an Output Format
  3. Format Templates
    1. Add a Format Template
    2. Remove a Format Template
    3. Edit the Code of a Format Template
      1. Basic Editing
      2. Use Format Elements
      3. Preview a Format Template
      4. Internationalization (i18n)
    4. Edit the Attributes of a Format Template
    5. Check the Dependencies of a Format Template
    6. Check the Validity a Format Template
  4. Format Elements
    1. Add a Format Element
    2. Remove a Format Element
    3. Edit the Code of a Format Element
    4. Preview a Format Element
    5. Internationalization (i18n)
    6. Edit the Attributes of a Format Element
    7. Check the Dependencies of a Format Element
    8. Check the Validity of a Format Element
    9. Browse the Format Elements Documentation
  5. Knowledge Bases
    1. Add a Knowledge Base
    2. Remove a Knowledge Base
    3. Add a Mapping
    4. Remove a Mapping
    5. Edit a Mapping
    6. Edit the Attributes of a Knowledge Base
    7. Check the Dependencies a Knowledge Base
  6. MARC Notation in Formats
  7. Migrating from Previous BibFormat
  8. Intergrating BibFormat into Dreamweaver MX
  9. FAQ

A Five Minutes Introduction to BibFormat

How BibFormat Works

BibFormat is in charge of formatting the bibliographic records that are displayed to your users. It is called by the search engine when it has to format a record.

As you might need different kind of formatting depending on the type of record, but potentially have a huge amount of records in your database, you cannot specify for each of them how they should look. Instead BibFormat uses a rule-based decision process to decide how to format a record.
The best way to understand how BibFormat works is to have a look at a typical workflow:

Step 1:
http://cdsweb.cern.ch/search?recid=946417&ln=en&of=hd When CDS Invenio has to display a record, it asks BibFormat to format the record with the given output format and language. For example here the requested output format is hd, which is a short code for "HTML Detailed". This means that somehow a user arrived on the page of the record and asked for a detailed view of the record.

Step 2:
1. Use Template [Picture HTML Detailed] if tag [980.a] is equal to [PICTURE] 2. Use Template [Thesis HTML detailed] if tag [980.a] is equal to [THESIS] 3. By default use [Default HTML Detailed] Beside is a screenshot of the "hd" or "HTML Detailed" output format. You can see that the output format does not specify how to format the record, but contains a set of rules which define which template must be used.
The rules are evaluated from top to bottom. Each rule defines a condition on a field of the record, and a format template to use to format the record if the condition matches. Let's say that the field 980.a of the record is equal to "Picture". Then first rules matches, and format template Picture HTML Detailed is used for formatting by BibFormat.
You can add, remove or edit output formats here

Step 3:
<h1 align="center"><BFE_MAIN_TITLE/></h1>
<p align="center">
<BFE_AUTHORS separator="; " link="yes"/><br/>
<BFE_DATE format="%d %B %Y"> .- <BFE_NB_PAGES suffix="p">
</p>
We see an extract of the Picture HTML Detailed format on the right, as it is shown in the template editor. As you can see it is mainly written using HTML. There are however some tags that are not part of standard HTML. Those tags that starts with <BFE_ are placeholders for the record values. For example <BFE_MAIN_TITLE/> tells BibFormat to write the title of the record. We call these tags "elements". Some elements have parameters. This is the case of the <BFE_AUTHORS> element, which can take separator and link as parameters. The value of separator will be used to separate authors' names and the link parameter tells if links to authors' websites have to be created. All elements are described in the elements documentation.
You can add, remove or edit format templates here

Step 4:
def format(bfo, separator='; ', link='no'):
   """
   Prints the list of authors for the record

   @param separator a character to separate the authors
   @param link if 'yes' print HTML links to authors
   """
   authors = bfo.fields("100__a")
   if link == 'yes':
      authors = map(lambda x: '<a href="'+weburl+'/search?f=author&p='\
                  + quote(x) +'">'+x+'</a>', authors)
   return authors.split(separator)
A format element is written in Python. It acts as a bridge between the record in the database and the format template. Typically you will not have to write or read format elements, just call them from the templates. Each element outputs some text that is written in the template where it is called.
Developers can add new elements by creating a new file, naming it with the name of element, and write a Python format function that takes as parameters the parameters of the elements plus a special one bfo. Regular Python code can be used, including import of other modules.

In summary BibFormat is called by specifying a record and an output format, which relies on different templates to do the formatting, and which themselves rely on different format elements. Only developers need to modify the format elements layer.

Output Format
Template
Template
Format Element
Format Element
Format Element
Format Element

You should now understand the philosophy behind BibFormat.

Short Tutorial

Let's try to create our own format. This format will just print the title of a record.

First go to the main BibFormat admin page. Then click on the "Manage Ouput Format" links. You will see the list of all output formats:

Output formats management page

This is were you can delete, create or check output formats. The menu at the top of the page let you go to other admininistration pages.
Click on the "Add New Output Format" button at the bottom of the page. You can then fill in some attributes for the output format. Choose "title" as code, "Only Title" as name and "Prints only title" as description:

Screenshot of the Update Output Format Attributes page

Leave other fields blank, and click on the button "Update Output format Attributes".
You are then redirected to the rules editor. Notice the menu at the top which let you close the editor, change the attributes again and check the output format. However do not click on these links before saving your modification of rules!

Output format menu

As our format does not need to have a different behaviour depending on the record, we do not need to add new rules to the format. You just need to select a format template in the "By default use" list. However we first have to create our special format template that only print titles. So close the editor using the menu at the top of the page, and in the menu that just appeared instead, click on "Manage Format Templates". In a similar way to output formats, you see the list of format templates.

Format template management page

Click on the "Add New Format Template" button at the bottom of the page. As for the output format, fill in the attributes of the template with name "Title" and any relevant description.

update format template attributes

Click on the "Update Output Format Attributes" button. You are redirected to the template editor. The editor is divided in three parts. The upper left part contains the code of the template. The bottom part is a preview of the template. The part on the right side is a short remainder of the format elements you can use in you template. You can hide this documentation by clicking on "Hide Documentation".

Format template editor

The above screenshot shows the template code already filled in. It calls the BFE_TITLE element. If you do not know the name of the element you want to call, you can search for it using the embedded documentation search. You can try to add other elements into your template, or write some HTML formatting.

When you are satisfied with your template, click on the save button, close the editor and go back to the "Only titles" output format rules editor. There select the template you have just created in the "Use by default" menu and save the ouput format and you are done.

This tutorial does not cover all aspects of the management of formats (For example "Knowledge bases" or internationalization). It also does not show all the power of output formats, as the one we have created simply call a template. However you have seen enough to configure BibFormat trough the web interface. Read the sections below to learn more about it.

Administer Through the Web Interface or Through the Configuration files

BibFormat can be administered in two ways. The first way is to use the provided web interface. It should be the most convenient way of doing for most users. The web interface is simple to use and provides great tools to manage your formats. Its only limitation concerns the format elements, which cannot be modified using it (But the web interface provide a dynamically generated documentation of your elements).
The other way to administer BibFormat is to directly modify the configuration files using your preferred text editor. This way of doing can bring much power to advanced users, but requires an access to the server's files. It also requires that the user double-check his modifications, or use the web interface to ensure the validity and correctness of his formats.

In this manual we will show both ways of doing. For each explication we show first how to do it through the web interface, then how to do it by manipulating the configuration files. Non-power users can stop reading as soon as they encounter the text "For developers and adventurers only".

We generally recommend to use the web interface, excepted for writing format elements.

Output Formats

As you potentially have a huge amount of bibliographic records, you cannot specify manually for each of them how it should be formatted. This is why you can define rules that will allow BibFormat to understand which kind of formatting to apply to a given record. You define this set of rules in what is called an "output format".

You can have different output formats, each with its own characteristics. For example you certainly want that when multiple bibliographic records are displayed at the same time (as it happens in search results), only short versions are shown to the user , while a detailed record is preferable when a single record is displayed, whatever the type of the record.
You might also want to let your users decide which kind of output they want. For example you might need to display HTML for regular web browsing, but would also give a BibTeX version of the bibliographic reference for direct inclusion in a LaTeX document.

To summarize, an output format groups similar kind of formats, specifying which kind of formatting has to be done, but not how it has to be done.

Add an Output Format

To add a new output format, go to the Manage Output Formats page and click on the "Add New Output Format" button at the bottom of the page. The format has been created. You can then specify the attributes of the output format. See Edit the Attributes of an Output Format to learn more about it.

For developers and adventurers only:

Alternatively you can directly add a new output format file into the /etc/bibformat/outputs/ directory of your CDS Invenio installation, if you have access to the server's files. Use the format extension .bfo for your file.

You should also check that user www-data has read/write access to the file, if you want to be able to modify the rules through the web interface.

Remove an Output Format

To remove an output format, go to the Manage Output Formats page and click on the "Delete" button facing the output format you want to delete. If you cannot click on the button (the button is not enabled), this means that you do not have sufficent priviledge to do so (Format is protected. Contact the administrator of the system).

For developers and adventurers only:

You can directly remove an output format from the /etc/bibformat/outputs/ directory of your CDS Invenio installation. However you must make sure that it is removed from the tables format and formatname in the database, so that other modules know that it is not longer available.

Edit the Rules of an Output Format

When you create a new output format, you can at first only specify the default template, that is the one which is used when all rules fail. In the case of a basic output format, this is enough. You can however add other rules, by clicking on the "Add New Rule" button.
Once you have added a rule, you can fill it with a condition, and a template that should be used if the condition is true. For example the rule

Rule: Use template [Picture HTML Detailed] if field [980.a] is equal to [PICTURE]

will use template named "Picture HTML Detailed" if the field 980.a of the record to format is equal to "Picture". Note that text "PICTURE" will match any letter case like "picture" or "Picture". Leading and trailing spaces are ignored too (" Picture " will match "PICTURE").
Tips: you can use a regular expression as text. For example "PICT.*" will match "pictures" and "PICTURE".

Reorder rules using arrows
The above configuration will use format template "Default HTML Detailed" if all above rules fail (in that case if field 980.a is different from "PICTURE"). If you have more rules, you decide in which order the conditions are evaluated. You can reorder rules by clicking on the small arrows on the left of the rules.

Note that when you are migrating your output formats from the old PHP BibFormat, you might not have translated all the formats to which your output formats refers. In that case you should use defined in old BibFormat option in the format templates menu, to make BibFormat understand that a match for this rule must trigger a call to the Behaviour of the old BibFormat. See section on Run old and new formats side by side for more details on this.

For developers and adventurers only:

To write an output format, use the following syntax:
First you define which field code you put as the conditon for the rule. You suffix it with a column. Then on next lines, define the values of the condition, followed by --- and then the filename of the template to use:

   tag 980.a:
   PICTURE --- PICTURE_HTML_BRIEF.bft
   PREPRINT --- PREPRINT_HTML_BRIEF.bft
   PUBLICATION --- PUBLICATION_HTML_BRIEF.bft
 

This means that if value of field 980.a is equal to PICTURE, then we will use format template PICTURE_HTML_BRIEF.bft. Note that you must use the filename of the template, not the name. Also note that spaces at the end or beginning are not considered. On the following lines, you can either put other conditions on tag 980.a, or add another tag on which you want to put conditions.

At the end you can add a default condition:

    default: PREPRINT_HTML_BRIEF.bft
 

which means that if no condition is matched, a format suitable for Preprints will be used to format the current record.

The output format file could then look like this:

   tag 980.a:
   PICTURE --- PICTURE_HTML_BRIEF.bft
   PREPRINT --- PREPRINT_HTML_BRIEF.bft
   PUBLICATION --- PUBLICATION_HTML_BRIEF.bft
 
   tag 8560.f:
   .*@cern.ch --- SPECIAL_MEMBER_FORMATTING.bft
 
   default: PREPRINT_HTML_BRIEF.bft
 

You can add as many rules as you want. Keep in mind that they are read in the order they are defined, and that only first rule that matches will be used. Notice the condition on tag 8560.f: it uses a regular expression to match any email address that ends with @cern.ch (the regular expression must be understandable by Python)

Edit the Attributes of an Output Format

An output format has the following attributes:

Please read this information regarding output format codes: There are some reserved codes that you should not use, or at least be aware of when choosing a code for your output format. The table below summarizes these special words:

CodePurpose
HBUsed for displaying list of results of a search.
HDUsed when no format is specified when viewing a record.
HMUsed for Marc output. The format is special in the sense that it filters fields to display according to the 'ot' GET parameter of the HTTP request.
Starting with letter 't'Used for displaying the value of the field specified by the 'ot' GET parameter of the HTTP request.
Starting with 3 digitsUsed for displaying the value of the field specified by the digits.

For developers and adventurers only:

Excepted for the code, output format attributes cannot be changed in the output format file. These attributes are saved in the database. As for the code, it is the name of the output format file, without its .bfo extension. If you change this name, do not forget to propagate the modification in the database.

Check the Dependencies an Output Format

To check the dependencies of an output format on format templates, format elements and tags, go to the Manage Output Formats page, click on the output format you want to check, and then in the menu click on "Check Dependencies".

Check Dependencies menu

The next page shows you:

Note that some Marc tags might be omitted.

Check the Validity an Output Format

To check the validity of an output format, simply go to the Manage Output Formats page, and look at the column 'status' for the output format you want to check. If message "Ok" is there, then no problem was found with the output format. If message 'Not Ok' is in the column, click on it to see the problems that have been found for the output format.

Format Templates

A format template defines the how a record should be formatted. For example it specifies which fields of the record are to be displayed, in which order and with which visual attributes. Basically the format template is written in HTML, so that it is easy for anyone to edit it.

Add a Format Template

To add a new format template, go to the Manage Format Templates page and click on the "Add New Format Template" button at the bottom of the page. The format has been created. You can then specify the attributes of the format template, or ask to make a copy of an existing format. See Edit the Attributes of a Format Template to learn more about editing the attributes.

For developers and adventurers only:

Alternatively you can directly add a new format template file into the /etc/bibformat/format_templates/ directory of your CDS Invenio installation, if you have access to the server's files. Use the format extension .bft for your file.

You should also check that user www-data has read/write access to the file, if you want to be able to modify the code and the attributes of the template through the web interface.

Remove a Format Template

To remove a format template, go to the Manage Format Templates page and click on the "Delete" button facing the format template you want to delete. If you cannot click on the button (the button is not enabled), this means that you do not have sufficent priviledge to do so (Format is protected. Contact the administrator of the system).

For developers and adventurers only:

You can directly remove the format template from the /etc/bibformat/format_templates/ directory of your CDS Invenio installation.

Edit the Code of a Format Template

You can change the formatting of records by modifying the code of a template.

To edit the code of a format template go to the Manage Format Templates page. Click on the format template you want to edit to load the template editor.

The format template editor contains three panels. The left upper panel is the code editor. This is were you write the code that specifies the formatting of a template. The right-most panel is a short documentation on the "bricks" you can use in your format template code. The panel at the bottom of the page allows you to preview the template.

Template Editor Page

The following sections explain how to write the code that specifies the formatting.

Basic Editing

The first thing you have to know before editing the code is that everything you write in the code editor is printed as such by BibFormat. Well almost everything (as you will discover later).

For example if you write "My Text", then for every record the output will be "My Text". Now let's say you write "<b>My Text</b>": the output will still be "<b>My Text</b>", but as we display in a web browser, it will look like "My Text" (The browser interprets the text inside tags <b></b> as "bold". Also note that the look may depend on the CSS style of your page).

Basically it means that you can write HTML to do the formatting. If you are not experienced with HTML you can use an HTML editor to create your layout, and the copy-paste the HTML code inside the template.

Do not forget to save your work by clicking on the save button before you leave the editor!

For developers and adventurers only:

You can edit the code of a template using exactly the same syntax as in the web interface. The code of the template is in the template file located in the /etc/bibformat/format_templates/ directory of your CDS Invenio installation. You just have to take care of the attributes of the template, which are saved in the same file as the code. See Edit the Attributes of a Format Template to learn more about it.

Use Format Elements

To add a dynamic behaviour to your format templates, that is display for example a different title for each record or a different background color depending on the type of record, you can use the format elements.

Format elements are the smart bricks you can copy-paste in your code to get the attributes of template that change depending on the record. A format element looks like a regular HTML tag.

For example, to print the title of a record, you can write <BFE_TITLE /> in your template code where you want to diplay the title

Format elements can take values as parameters. This allows to customize the behaviour of an element. For example you can write <BFE_TITLE prefix="Title: " />, and BibFormat will take care of printing the title for you, with prefix "Title: ". The difference between Title: <BFE_TITLE /> and <BFE_TITLE prefix="Title: " /> is that the first option will always write "Title: " while the second one will only print "Title: " if there exist a title for the record in the database. Of course there are chances that there is always a title for each record, but this can be useful for less common fields.

Some parameters are available for all elements. This is the case for the following ones:

Some parameters are specific to elements. To get information on all available format elements you can read the Format Elements Documentation, which is generated dynamically for all existing elements. it will show you what the element do and what parameters it can take.

While format elements looks like HTML tags, they differ in the followings ways from traditional ones:

Tips: you can use the special element <BFE_FIELD tag="" /> to print the value of any field of a record in your templates. This practice is however not recommended because it would necessitate to revise all format templates if you did change the meaning of the MARC code schema.

Preview a Format Template

To preview a format template go to the Manage Format Templates page and click on the format template you want to preview to open the template editor. The editor contains a preview panel at the bottom of the page.

Preview Panel

Simply click on " Reload Preview" button to preview the template (you do not need to save the code before previewing).
Use the "Language" menu to preview the template in a given language

You can fill in the "Search Pattern" field to preview a specific record. The search pattern uses exactly the same syntax as the one used in the web interface. The only difference with the regular search engine is that only the first matching record is shown.

For developers and adventurers only:

If you do not want to use the web interface to edit the templates but still would like to get previews, you can open the preview frame of any format in a new window/tab. In this mode you get a preview of the template (if it is placed in the /etc/bibformat/format_templates/ directory of your CDS Invenio installation). The parameters of the preview are specified in the url:

Internationalization (i18n)

You can add translations to your format templates. To do so enclose the text you want to localize with tags corresponding to the two letters of the language. For example if we want to localize "title", write <en>Title</en>. Repeat this for each language in which you want to make "title" available: <en>Title</en><fr>Titre</fr><de>Titel</de>. Finally enclose everything with <lang> </lang> tags: <lang><en>Title</en><fr>Titre</fr><de>Titel</de></lang>

For each <lang> group only the text in the user's language is displayed. If user's language is not available in the <lang> group, your default CDS Invenio language is used.

Edit the Attributes of a Format Template

To edit the attributes of a format template go to the Manage Format Templates page, click on the format template you want to edit, and then in the menu click on "Modify Template Attributes".

A format template contains two attributes:

Note that changing these parameters has no impact on the formatting. Their purpose in only to document the template.

If the name you have chosen already exists for another template, you name will be suffixed with an integer so that the name is unique.

You should also be aware that if you change the name of a format template, all output formats that were linking to this template will be changed to match the new name.

For developers and adventurers only:

You can change the attriutes of a template by editing its file in the /etc/bibformat/format_templates/ directory of your CDS Invenio installation. The attributes must be enclosed with tags <name> </name> and <description> </description> and should ideally be placed at the beginning of the file.

Also note that the admin web interface tries to keep the name of the template in sync with the filename of the template. If the name is changed through the web interface, the filename of the template is changed, and all output formats that use this template are updated. You have to do update output formats manually if you change the filename of the template without the web interface.

Check the Dependencies of a Format Template

To check the dependencies of a format template go to the Manage Format Template page, click on the format template you want to check, and then in the menu click on "Check Dependencies".

Check Dependencies menu

The next page shows you:

Note that some Marc tags might be omitted.

Check the Validity a Format Template

To check the validity of a format template, simply go to the Manage Format Templates page, and look at the column 'status' for the format template you want to check. If message "Ok" is there, then no problem was found with the template. If message 'Not Ok' is in the column, click on it to see the problems that have been found for the template.

Format Elements

Format elements are the bricks used in format templates to provide dynamic content to the formatting process. Their purpose is to allow non computer literate persons to easily integrate data from the records in the database into their templates.

Format elements are typically written in Python (there is an exception to that point which is dicussed in Add a Format Element). This brings great flexibily and power to the formatting process. This however restricts the creation of format elements to developers.

Add a Format Element

The most typical way of adding a format element is to drop a .py file in the lib/python/invenio/bibformat_elements directory of your CDS Invenio installation. See Edit the Code of a Format Element to learn how to implement an element.

The most simple way to add a format element is to add a en entry in the "Logical Fields" management interface of the BibIndex module. When BibFormat cannot find the Python format element corresponding to a given name, it looks into this table for the name and prints the value of the field declared for this name. This lightweight way of doing is straightforward but does not allow complex handling of the data (it limits to printing the value of the field, or the values of the fields if multiple fields are declared under the same label).

Remove a Format Element

To remove a Python format element simply remove the corresponding file from the lib/python/invenio/bibformat_elements directory of your CDS Invenio installation.

To remove a format element declared in the "Logical Fields" management interface of the BibIndex module simply remove the entry from the table.

Edit the Code of a Format Element

This section only applies to Python format elements. Basic format elements declared in "Logical Fields" have non configurable behaviour.

A format element file is like any regular Python program. It has to implement a format function, which returns a string and takes at least bfo as first parameter (but can take as many others as needed).

Here is for example the code of the "bfe_title.py" element:

 def format(bfo, separator=" "):
     """
     Prints the title of a record.
 
     @param separator separator between the different titles
     """
     titles = []
    
     title = bfo.field('245.a')
     title_remainder = bfo.field('245.b')
 
     titles.append( title + title_remainder )
 
     title = bfo.field('0248_a')
     if len(title) > 0:
         titles.append( title )
 
     title = bfo.field('246.a')
     if len(title) > 0:
         titles.append( title )
 
     title = bfo.field('246_1.a')
     if len(title) > 0:
         titles.append( title )
 
     return separator.join(titles)
 
In format templates this element can be called like a function, using HTML syntax:
<BFE_TITLE separator="; "/>
Notice that the call uses (almost) the filename of your element. To find out which element to use, BibFormat tries different filenames until the element is found: it tries to
  1. ignore the letter case
  2. replace underscore with spaces
  3. remove the BFE_ from the name
This means that even if the filename of your element is "my element.py", BibFormat can resolve the call <BFE_MY_ELEMENT /> in a format template. This also means that you must take care no to have two format elements filenames that only differ in term of the above parameters.

The string returned by the format function corresponds to the value that is printed instead of the format element name in the format template.

The bfo object taken as parameter by format stands for BibFormatObject: it is an object that represents the context in which the formatting takes place. For example it allows to retrieve the value of a given field for the record that is being formatted, or the language of the user. We see the details of the BibFormatObject further below.

The format function of an element can take other parameters, as well as default values for these parameters. The idea is that these parameters are accessible from the format template when calling the elements, and allow to parametrize the behaviour of the format element.

It is very important to document your element: this allows to generate a documentation for the elements accessible to people writing format templates. It is the only way for them to know what your element do. The key points are:

Typically you will need to get access to some fields of a record to display as output. There are two ways to this: you can access the bfo object given as parameter and use the provided (basic) accessors, or import a dedicated module and use its advanced functionalities.

Method 1: Use accessors of bfo:
bfo is an instance of the BibFormatObject class. The following methods are available:

Method 2: Use module BibRecord:
BibRecord is a module that provides advanced functionalities regarding access to the field of a record bfo.get_record() returns a structure that can be understood by BibRecord's functions. Therefore you can import the module's functions to get access to the fields you want.

Internationalization (i18n)

You can follow the standard internationalization procedure in use accross CDS Invenio sources. For example the following code will get you the translation for "Welcome" (assuming "Welcome" has been translated):

 from invenio.messages import gettext_set_language
 
 ln = bfo.ln
 _ = gettext_set_language(ln)
 
 translated_welcome =  _("Welcome")
 

Notice the access to bfo.ln to get access to the current language of the user. For simpler translations or behaviour depending on the language you can simply check the value bfo.ln to return your custom text.

Edit the Attributes of a Format Element

A format element has mainly four kinds of attributes:

Check the Dependencies of a Format Element

There are two ways to check the dependencies of a format element. The simplest way is to go to the format elements documentation and click on "Dependencies of this element" for the element you want to check.

The second method to check the dependencies of an element is through regular unix tools: for example $ grep -r -i 'bfe_your_element_name' . inside the format templates directory will tell you which templates call your element.

Check the Validity of a Format Element

There are two ways to check the validity of an element. The simplest one is to go to the format elements documentation and click on "Correctness of this element" for the element you want to check.

The second method to check the validity of an element is through regular Python methods: you can for example import the element in the interactive interpreter and feed it with test parameters. Notice that you will need to build a BibFormatObject instance to pass as bfo parameter to the format function of your element.

Browse the Format Elements Documentation

Go to the format elements documentation. There is a summary of all available format elements at the top of the page. You can click on an element to go to its detailed description in the second part of the page.

Each detailed documentation shows you:

Preview a Format Element

You can play with a format element parameters and see the result of the element directly in the format elements documentation: for each element, under the section "See also", click on "Test this element". You are redirected to a page where you can enter a value for the parameters. A description is associated with each parameter as well as an indication of the default value of the parameter if you do not provide a custom value. Click on the "Test!" button to see the result of the element with your parameters.

Knowledge Bases

Knowledge bases are a way to define easily extendable repositories of mappings. Their use is various, but their main purpose is to get, given a value, the normalized version of this value. For example you may use a knowledge base to hold a list of all ways to abbreviate the name of a journal, and map these abbreviations to the full journal name. This would be useful to get a normalized journal name accross all of your records.

The knowledge base itself offers no method to do this normalization. It is limited to the archiving of this knowledge. To benefit from the normalization you need to use a format element which is knowledge-base-aware. The element will look by iteself into the knowledge base to format a record. In that way you can extend the formatting capabilities of this element without having to modify it.

Add a Knowledge Base

To add a knowledge base go to the Manage Knowledge Bases administration page. At the bottom of the page click on the "Add New Knowledge Base" button. The knowledge base has been created and you are asked to fill in its attribute. See Edit the Attributes of a Knowledge Base to learn more about the attributes of knowledge bases.

Remove a Knowledge Base

To remove a knowledge base go to the Manage Knowledge Bases administration page. Click on the "Delete" button facing the knowledge base you want to remove and confim. The knowledge base and all the mapping it includes are removed.

Add a Mapping

Go to the Manage Knowledge Bases administration page and click on the knowledge base for which you want to add a mapping. Fill in the form of the "Add New Mapping" section on the left of the page with the new mapping, and click on "Add New Mapping". The mapping has been created. Alternatively you can create the mapping without its attributes, and fill them afterward (See Edit a Mapping).

Remove a Mapping

Go to the Manage Knowledge Bases administration page and click on the knowledge base for which you want to remove a mapping. Click on the "Delete" button facing the mapping you want to delete.

Edit a Mapping

Go to the Manage Knowledge Bases administration page and click on the knowledge base for which you want to edit a mapping. Locate the mapping in the list. You can click on the column headers to order the list by Map From or by Map To to help you find it. Once you have edited the mapping click on the corresponding "Save" button.

Edit the Attributes of a Knowledge Base

Go to the Manage Knowledge Bases administration page and click on the knowledge base you want to edit. In the top menu, click on "Knowledge Base Attributes". You can then give your knowledge base a name and a description. Finally click on the "Update Base Attributes" button.

Check the Dependencies a Knowledge Base

To check the dependencies of a knowledge base go to the Manage Knowledge Bases page, click on the knowledge base you want to check, and then in the menu click on "Knowledge Base Dependencies".

The next page shows you the list of format elements that use this knowledge base.

Note that some format elements might be omitted.

MARC Notation in Formats

The notation for accessing fields of a record are quite flexible. You can use a syntax strict regarding MARC 21, but also a shortcut syntax, or a syntax that can have a special meaning.

The MARC syntax is the following one: tag[indicator1][indicator2] [$ subfield] where tag is 3 digits, indicator1 and indicator2 are 1 character each, and subfield is 1 letter.

For example to get access to an abstract you can use the MARC notation 520 $a. You can use this syntax in BibFormat. However you can also:

Migrating from Previous BibFormat

The new Python BibFormat formats are not backward compatible with the previous formats. New concepts and capabilities have been introduced and some have been dropped. If you have not modified the "Formats" or modified only a little bit the "Behaviours" (or modified "Knowledge Bases"), then the transition will be painless and automatic. Otherwise you will have to manually rewrite some of the formats. This should however not be a big problem. Firstly because the CDS Invenio installation will provide both versions of BibFormat for some time. Secondly because both BibFormat versions can run side by side, so that you can migrate your formats while your server still works with the old formats. Thirdly because we provide a migration kit that can help you go through this process. Finally because the migration is not so difficult, and because it will be much easier for you to customize how BibFormat formats your bibliographic data.

The first thing you should do is to read the Five Minutes Introduction to BibFormat to understand how the new BibFormat works. We also assume that you are familiar with the concepts of the old BibFormat. As the new formats separate the presentation from the business logic (i.e. the bindings to the database), it is not possible to automatically handle the translation. This is why you should at least be able to read and understand the formats that you want to migrate.

Differences between old and new BibFormat

The most noticeable differences are:

a) "Behaviours" have been renamed "Output formats".
b) "Formats" have been renamed "Format templates". They are now written in HTML.
c) "User defined functions" have been dropped.
d) "Extraction rules" have been dropped.
e) "Link rules" have been dropped.
f) "File formats" have been dropped.
g) "Format elements" have been introduced. They are written in Python, and can simulate c), d) and e).
h) Formats can be managed through web interface or through human-readable config files.
i) Introduction of tools like validator and dependencies checker.
j) Better support for multi-language formatting.

Some of the advantages are:

+ Management of formats is much clearer and easier (less concepts, more tools).
+ Writing formats is easier to learn : less concepts to learn, redesigned work-flow, use of existing well known and well documented languages.
+ Editing formats is easier: You can use your preferred HTML editor such as Emacs, Dreamweaver or Frontpage to modify templates, or any text editor for output formats and format elements. You can also use the simplified web administration interface.
+ Faster and more powerful templating system.
+ Separation of business logic (output formats, format elements) and presentation layer (format templates). This makes the management of formats simpler.

The disadvantages are:

- No backward compatibility with old formats.
- Stricter separation of business logic and presentation layer:
no more use of statements such as if(), forall() inside templates, and this requires more work to put logic inside format elements.

Migrating behaviours to output formats

Behaviours were previously stored in the database and did require to use the evaluation language to provide the logic that choose which format to use for a record. They also let you enrich records with some custom data. Now their use has been simplified and rectricted to equivalence tests on the value of a field of the record to define the format template to use.

For example, the following behaviour:

CONDITIONS
0 $980.a="PICTURE"
Action (0) "<record>
 <controlfield tag=\"001\">" $001 "</controlfield>
 <datafield tag=\"FMT\" ind1=\"\" ind2=\"\"> 
 <subfield code=\"f\">hb</subfield> 
 <subfield code=\"g\">" 
xml_text(format("PICTURE_HTML_BRIEF"))
" </subfield> 
 </datafield>
</record>"
 
100 ""=""
Action (0) "<record>
 <controlfield tag=\"001\">" $001 "</controlfield>
 <datafield tag=\"FMT\" ind1=\"\" ind2=\"\"> 
 <subfield code=\"f\">hb</subfield> 
 <subfield code=\"g\">" 
xml_text(format("DEFAULT_HTML_BRIEF"))
" </subfield> 
 </datafield>
</record>"
 

translates to the following output format (in textual configuration file):

tag 980.a:
PICTURE --- Picture_HTML_brief.bft
default: Default_HTML_brief.bft

or visual representation through web interface:
Image representation of HB output format

We suggest that you use the migration kit to produce initial output formats from your behaviours, but that you go through the created .bfo files in the /etc/bibformat/output_formats/ directory of your CDS Invenio installation to check that they correspond to your behaviours.

Migrating formats to format templates and format elements

The migration of formats is the most difficult part of the migration. You will need to separate the presentation code (HTML) from the business code (iterations, tests and calls to the database). Here are some tips on how you can do this:

We recommend that you do not use the migration kit for this part: it can help you create the initial files, but will never be able to provide a working implementation of the formats.

Migrating Knowledge Bases

We recomment yo use the migration kit to migrate your knowledge bases. It should have no problem to migrate this part of your configuration.

Migrating UDFs and Link rules

User Defined Functions and Link rules have been dropped in the new BibFormat. These concepts have no reasons to be as they can be fully implemented in the format elements. For example the AUTHOR_SEARCH link rule can directly be implemented in the Authors.bfe element.

As for the UDFs, most of them are directly built-in functions of Python. Whenever a special function as to be implemented, it can be defined in a regular Python file and used in any element.

The Migration Kit

The migration kit is available from the main BibFormat admin webpage or directly here. The migration kit has 3 steps, each migrating some part of your configuration. Just click on the links to migrate each part and get the status of the migration.

You should note that each migration will create new files or entries in the database, such that you will certainly want to click only once on each step (otherwise you will get duplicates).

The migration kit can:
a) Effortlessly migrate your behaviours, unless they include complex logic, which usually they don't.
b) Help you migrate formats to format templates and format elements.
c) Effortlessly migrate your knowledge bases.

Point b) is the most difficult to achieve: previous formats did mix business logic and code for the presentation, and could use PHP functions. The new BibFormat separates business logic and presentation, and does not support PHP. The transition kit will try to move business logic to the format elements, and the presentation to the format templates. These files will be created for you, includes the original code and, if possible, a proposal of Python translation. We recommend that you do not to use the transition kit to translate formats, especially if you have not modified default formats, or only modified default formats in some limited places. You will get cleaner code if you write format elements and format templates yourself.

Run old and new formats side by side

You might want to migrate your formats over a long period of time, making new formats available to your users once they have been migrated, while old formats are still being used if they have not been translated. BibFormat will do this almost automatically. This section tells you what you should be aware of if you want this to work seamlessly.

When BibFormat has to format a record with a given output format code, it first tries to find a corresponding output format in the (new) output formats directory. If the output format cannot be found, it handle the formatting process to the old BibFormat, which will look for a behaviour with a name corresponding to code. This leads to the first rule you should follow:

For each of the Behaviours you want to migrate, you should have an Output Format with a code corresponding to the name of the Behaviour.

The second (and last) rule is as simple as the first one. Imagine you have a Behaviour "HD" that you want to migrate to Output Format "HB". Let's say that "HD" links to 'picture_HTML_detailed' format if field 980$a is equal to "Picture", and links to 'default_HTML_detailed' in all other cases, but that 'picture_HTML_detailed' has not been migrated to a new format template. Then second rule says:

Output Formats should have the same conditions on tags as Behaviours, even if format for that condition has not been migrated.

In our example if you open the "HD" ouput format in the web interface, we can add a rule that works on condition "If 980$a is PICTURE" and set the template to be used to "defined in old BibFormat" in the template menu. This looks strange, this is the only way to tell BibFormat that it should consider this condition and not go to the default rule and use the default template.

For developers and adventurers only:

If you are to write Output Formats without the web interface, you should use the name migration_in_progress for each template which has not been migrated. The above example would therefore become:
tag 980.a :
PICTURE --- migration_in_progress
default: Default_HTML_detailed.bft

Integrating BibFormat into Dreamweaver MX

BibFormat templates have been thought to be editable in custom HTML editors. We propose in this section a way to extend one particular editor, Dreamweaver.

Make Dreamweaver Recognize Format Elements in Layout View

To make Dreamweaver understand the format elements and display an icon for each of them in the layout editor, you must edit a Dreamweaver configuration file named Tags.xml located inside /Configuration/ThirdPartyTags directory of your Dreamweaver installation folder. At the end of this file, copy-paste the following lines:

   <!-- BibFormat (CDS Invenio) -->
   <tagspec tag_name="BIBFORMAT" start_string="<BFE_" end_string="/>" parse_attributes="false" detect_in_attribute="true" icon="bibformat.gif" icon_width="25" icon_height="16"></tagspec >
   <tagspec tag_name="BIBFORMAT" start_string="<bfe_" end_string="/>" parse_attributes="false" detect_in_attribute="true" icon="bibformat.gif" icon_width="25" icon_height="16"></tagspec >
   
Also copy this icon bibformat.gif in the same directory as Tags.xml (right-click on icon, or ctrl-click on one-button mouse, and "Save Image As..."). Make sure the downloaded image is named "bibformat.gif".

Note that Dreamweaver might not recognize Format Elements when complex formatting is involved due to these elements.

Add a Format Elements Floating Panel

You can add a floating panel that will you to insert Format Elements in your document and read the documentation of all available Format Elements.

The first step is to declare in which menu of Dreamweaver this floating panel is going to be available. To do so, edit file "Menu.xml" located inside /Configuration/Menus of your Dreamweaver application directory and copy-paste the following line in the menu you want (typically inside tag 'menu' with attribute id='DWMenu_Window_Others'):

    <menuitem name="BibFormat Elements" enabled="true" command="dw.toggleFloater('BibFormat_floater.html')" checked="dw.getFloaterVisibility('BibFormat_floater.html')" />
   

Once this is done, you can download the floating palette (if file opens in your browser instead of downloading, right-click on icon, or ctrl-click on one-button mouse, and "Save Target As...") and move the dowloaded file "BibFormat_floater.html" (do not rename it) into /Configuration/Floaters directory of your Dreamweaver application folder.

To use the BibFormat floating panel, open Dreamweaver, and choose Window > Others > BibFormat Elements.

Whenever a new version of the palette is available, you can skip the edition of file "Menu.xml" and just replace the old "BibFormat_floater" file with the new one.

FAQ

Why do we need output formats? Wouldn't format templates be sufficient?

As you potentially have a lot of records, it is not conceivable to specify for each of them which format template they should use. This is why this rule-based decision layer has been introduced.

How can I protect a format?

As a web user, you cannot protect a format. If you are administrator of the system and have access to the format files, you can simply use the permission rights of your system, as BibFormat is aware of it.

Why cannot I edit/delete a format?

The format file has certainly been protected by the administrator of the server. You must ask the administrator to unprotect the file if you want to edit it.

How can I add a format element from the web interface?

Format elements cannot be added, removed or edited through the web interface. This limitation has been introduced to limit the security risks caused by the upload of Pythonic files on the server. The only possibility to add a basic format element from the web interface is to add a en entry in the "Logical Fields" management interface of the BibIndex module (see Add a Format Element)

Why are some Marc codes omitted in the "Check Dependencies" pages?

When you check the dependencies of a format, the page reminds you that some use of Marc codes might not be indicated. This is because it is not possible (or at least not trivial) to guess that the call to field(str(5+4)+"80"+".a") is equal to a call to field("980.a"). You should then not completely rely on this indication.

How are displayed deleted record?

By default, CDS Invenio displays a standard "The record has been deleted." message for all output formats with a 'text/html' content type. Your output format, format templates and format elements are bypassed by the engine. However, for more advanced output formats, CDS Invenio goes through the regular formatting process and let your formats do the job. This allows you to customize how a record should be displayed once it has been deleted.

Why are some format elements omitted in the "Knowledge Base Dependencies" page?

When you check the dependencies of a knowledge base, the page reminds you that format elements using this knowledge base might not be indicated. This is because it is not possible (or at least not trivial) to guess that the call to kb(e.upper()+"journal"+"s") in a format element is equal to a call to kb("Ejournals"). You should then not completely rely on this indication.

Why are some format elements defined in field table omitted in the format element documentation?

Some format elements defined in the "Logical Fields" management interface of the BibIndex module (the basic format elements) are not shown in the format elements documentation pages. We do not show such an element if its name starts with a number. This is to reduce the number of elements shown in the documentation as the logical fields table contains a lot of not so useful fields to be used in templates.

How can I escape special HTML/XML chars like '<', '>' and '&'? What is the default behaviour of BibFormat?

-

By default, BibFormat escapes no value at all. However, when calling a format element -from a format template, you can set the special parameter escape of your element to '1' to -make BibFormat understand that it has to escape values of the element.
+

Format templates:

+

By default, BibFormat escapes all values returned by format elements. As a format template designer, +you can assume in almost all cases that the values you get from a format element will be escaped for you. For special cases, you can set the parameter escape of the element to '0' when calling it, to +make BibFormat understand that it must not escape the values of the element, or to '1' to force the escaping.
For example <bfe_abstract /> will return:
[...]We find that for spatially-flat cosmologies, background lensing
clusters with reasonable mass-to-light ratios lying in the
-redshift range 0<1 are strongly excluded, [...]

-while <bfe_abstract escape="1"/> will return:
-[...]We find that for spatially-flat cosmologies, background lensing
-clusters with reasonable mass-to-light ratios lying in the
-redshift range 0&lt;1 are strongly excluded, [...]

-In most cases, you will -want to set escape to '1', but you have to make sure that the author of the element has not already -escaped values for you. This might be the case when the element returns some HTML.
-Please note that values given in sepcial parameters prefix, suffix, default and nbMax are never escaped, whatever the value of escape is.

+redshift range 0&lt;1 are strongly excluded, [...]

+while <bfe_abstract escape="0"/> will return:
+[...]We find that for spatially-flat cosmologies, background lensing
+clusters with reasonable mass-to-light ratios lying in the
+redshift range 0<1 are strongly excluded, [...]

+In most cases, you will not set escape to 1, nor 0, but just let the developer of the +element take care of that for you.
+Please note that values given in special parameters prefix, suffix, default and nbMax are never escaped, whatever the value of escape is (but other parameters will). You have to take care of that in your format template, as well as all other values that are not returned by the format elements.

+

If you are a format elements developer, please see below.

+ +

Format elements:

+

In most cases, that is cases where your element does not return HTML output, you do not have to take +any particular action in order to escape values that you output: the BibFormat engine will take care of escaping +the returned value for you. In cases where you want to return text that should not be escaped (for example +when you return HTML links), you can make the formatting engine know that it should not escape your value. This is done +by implementing the escape_values(bfo) function in your element, that will return (int) 0 when escape should +not be done (or 1 when escaping should be done): +

def escape_values(bfo):
+    """
+    Called by BibFormat in order to check if output of this element
+    should be escaped.
+    """
+    return 0
+
Note that the function is given a bfo object as parameter, such that you can do additional testing +if your element should really return 1 or 0 (for very special cases).
+Also note that the behavior defined by the escape_values() function will be overriden by the +escape parameter used in the format template if it is specified. +

+

If you are a format templates designer, please see above.

Old PHP BibFormat Aministration Guide

Contents

1.
Overview
2. Configuring BibFormat
3. Running BibFormat
       3.1 From Web interface
       3.2 From the command-line interface
4. Detailed Configuration Manual
       4.1 About BibFormat
       4.2 How it works?
       4.3 A first look at the web configuration interface
       4.4 Mapping the input (OAI Extraction Rules)
       4.5 Defining output types: Behaviors
       4.6 Formats
       4.7 Knowledge bases (KBs)
       4.8 User Defined Functions (UDFs)
       4.9 Defining links
             4.9.1 EXTERNAL link conditions
             4.9.2 INTERNAL link conditions
             4.9.3 Example
       4.10 User management
       4.11 Evaluation Language Reference

1. Overview

The BibFormat admin interface enables you to specify how the bibliographic data is presented to the end user in the search interface and search results pages. For example, you may specify that titles should be printed in bold font, the abstract in small italic, etc. Moreover, the BibFormat is not only a simple bibliographic data output formatter, but also an automated link constructor. For example, from the information on journal name and pages, it may automatically create links to publisher's site based on some configuration rules.

2. Configuring BibFormat

By default, a simple HTML format based on the most common fields (title, author, abstract, keywords, fulltext link, etc) is defined. You certainly want to define your own ouput formats in case you have a specific metadata structure.

Here is a short guide of what you can configure:

Behaviours
Define one or more output BibFormat behaviours. These are then passed as parameters to the BibFormat modules while executing formatting.
Example: You can tell BibFormat that is has to enrich the incoming metadata file by the created format, or that it only has to print the format out.
Extraction Rules
Define how the metadata tags from input are mapped into internal BibFormat variable names. The variable names can afterwards be used in formatting and linking rules.
Example: You can tell that 100 $a field should be mapped into $100.a internal variable that you could use later.
Link Rules
Define rules for automated creation of URI links from mapped internal variables.
Example: You can tell a rule how to create a link to People database out of the $100.a internal variable repesenting author's name. (The $100.a variable was mapped in the previous step, see the Extraction Rules.)
File Formats
Define file format types based on file extensions. This will be used when proposing various fulltext services.
Example: You can tell that *.pdf files will be treated as PDF files.
User Defined Functions (UDFs)
Define your own functions that you can reuse when creating your own output formats. This enables you to do complex formatting without ever touching the BibFormat core code.
Example: You can define a function how to match and extract email addresses out of a text file.
Formats
Define the output formats, i.e. how to create the output out of internal BibFormat variables that were extracted in a previous step. This is the functionality you would want to configure most of the time. It may reuse formats, user defined functions, knowledge bases, etc.
Example: You can tell that authors should be printed in italic, that if there are more than 10 authors only the first three should be printed, etc.
Knowledge Bases (KBs)
Define one or more knowledge bases that enables you to transform various forms of input data values into the unique standard form on the output.
Example: You can tell that Phys Rev D and Physical Review D are both the same journal and that these names should be standardized to Phys Rev : D.
Execution Test
Enables you to test your formats on your sample data file. Useful when debugging newly created formats.

To learn more on BibFormat configuration, you can consult the BibFormat Admin Guide.

3. Running BibFormat

3.1. From the Web interface

Run Reformat Records tool. This tool permits you to update stored formats for bibliographic records.
It should normally be used after configuring BibFormat's Behaviours and Formats. When these are ready, you can choose to rebuild formats for selected collections or you can manually enter a search query and the web interface will accomplish all necessary formatting steps.
Example: You can request Photo collections to have their HTML brief formats rebuilt, or you can reformat all the records written by Ellis.

3.2. From the command-line interface

Consider having an XML MARC data file that is to be uploaded into the CDS Invenio. (For example, it might have been harvested from other sources and processed via BibConvert.) Having configured BibFormat and its default output type behaviour, you would then run this file throught BibFormat as follows:

 $ bibformat < /tmp/sample.xml > /tmp/sample_with_fmt.xml
 
that would create default HTML formats and would "enrich" the input XML data file by this format. (You would then continue the upload procedure by calling successively BibUpload and BibWords.)

Now consider a different situation. You would like to add a new possible format, say "HTML portfolio" and "HTML captions" in order to nicely format multiple photographs in one page. Let us suppose that these two formats are called hp and hc and are already loaded in the collection_format table. (TODO: describe how this is done via WebAdmin.) You would then proceed as follows: firstly, you would prepare the corresponding output behaviours called HP and HC (TODO: note the uppercase!) that would not enrich the input file but that would produce an XML file with only 001 and FMT tags. (This is in order not to update the bibliographic information but the formats only.) You would also prepare corresponding formats at the same time. Secondly, you would launch the formatting as follows:

 $ bibformat otype=HP,HC < /tmp/sample.xml > /tmp/sample_fmts_only.xml
 
that should give you an XML file containing only 001 and FMT tags. Finally, you would upload the formats:
 $ bibupload < /tmp/sample_fmts_only.xml
 
and that's it. The new formats should now appear in WebSearch.

4. Detailed Configuration Manual

What follows is a transcription of an old FlexElink Configuration Manual v0.3 (2002-07-31). The text suffers from missing screen snapshots, and the terminology may not be fully up-to-date at places.

4.1. About BibFormat

BibFormat is a piece of software that is part of the CDS Invenio (http://cdsweb.cern.ch).

Its mission, in few words, is to provide a flexible mechanism to format the bibliographic records that are shown as a result of CDS Search user queries allowing the administrators or users customize the view of them. Besides, it offers the possibility of using a linking system that can generate automatically all the links included in the displayed records (fulltext access, electronic journals reference, etc) reducing considerably maintenance.

To clarify this too formal definition, we'll try to illustrate the role of BibFormat inside the CDS Search module by showing the following figure. Please, note that this drawing is trying to show the main role that BibFormat plays in the CDS structure and it's quite simplified, but of course the underlying logic is a bit more complex.

[Fig. 0]

As you can see, when a user query is received, Weblib determines which records from the database match it; then it ask BibFormat to format the obtained records. BibFormat looks at its rule repository and for each record determines which format has to be taken, applies the format specification and solves the possible links; gives all this (in a formatted way) back to Weblib and it makes a nice HTML page including the formatted results given by BibFormat among other info.

The good point in all this is that anyone that has access to BibFormat rule repository is able to modify the final appearance of a query result in the CDS Search module without altering the logic of the search engine.

In order to be able to modify this BibFormat rule repository, a web configuration interface is provided. Trough this paper, we'll try to explain (in a friendly way and form the user point of view) how to access this interface, how it's structured and how to configure BibFormat trough it to achieve desired results.

4.2. How it works?

We've outlined which is the role of BibFormat inside the CDS, so it's time now to have an overview of how it works and how it's organized. We'll try not to be very technical, however a few explanation about the BibFormat repository and architecture is needed to understand how it works.

BibFormat, basically, takes some bibliographic records as input and produces a formatted & linked version of them as output. By "formatted" we mean that BibFormat can produce an output containing a transformed version of the input data (normally an HTML view); the good part is that you can entirely specify the transformation to apply. At the same time, by "linked" we mean that you can ask BibFormat to include (if necessary) inside this formatted version references to some Internet resources that are related to the data from some pre-configured rules.

As an example, we could imagine that you'd want to see the resulting records from CDS Search queries to show their title in bold followed by their authors separated by comas. For achieving this you'll have to go to the BibFormat configuration interface and define a behavior for BibFormat in which you describe how to format incoming records:

 
   "<b>" $title "</b>"
   forall($author){
       $author separator(", ")
   }
 
 

Figure 1.- A very first Evaluation Language example

Don't be scared!! It's a first approach to the way BibFormat allows you to describe formats. As you can see, BibFormat uses a special language that you'll have to learn if you want to be able to specify formats or links; it seems difficult (as much as a programming language) but you'll see that it's quite more easy than it seems at first sight.

In the next figure, is shown how BibFormat works internally. When BibFormat is called, it receives a set of bibliographic records to format. It separates each record and translates it into a set of what we call "internal variables"; these "internal variables" are simply an internal representation of the bibliographic record; the important thing with them is that they will be available when you have to describe the formats. Once it has these "internal vars", the processor module looks into the behavior repository for that one (let's say format) you've asked BibFormat to apply (when BibFormat is called, you can indicate which of the pre-configured behaviors to apply; this allows it to have more than one behavior); inside this behavior you can specify which data you want to appear, how it has to appear, some links if they exist... in other words, the format (actually, it's something more than a format, it describes how BibFormat has to behave for a given input; that's why we refer to it as behavior). As we've already said, you can include links in a behavior specification; links are a special BibFormat feature that helps you to reduce the maintenance of your formats: you can include a link in several formats or behaviors.

The picture below, describes all this explanation.

[Fig. 2]

Summarizing, BibFormat can transform an input made up of bibliographic records in an HTML output (not only HTML but any text-based output) according to certain pre-configured specifications (behaviors) that you can entirely define using a certain language.

Just to mention, currently BibFormat is working taking OAI MARC XML as format for input records, but it can be adapted to other ways of inputs (reading a database, function call, etc) with a little of development.

4.3. A first look at the web configuration interface

BibFormat can be configured through its configuration interface that is accessible via web. It's made up of a bunch of web pages that present you the main configuration aspects of BibFormat allowing you to change them. In this section we are going to have a first look at this web interface, how it's structured and its correspondence with BibFormat features.

Before entering these web pages you'll be asked for your accessing username & password. Only certain users are allowed to access BibFormat WI; first you need a CDS account that you can create easily by using the standard CDS account manager; then you have to ask BibFormat administrator to give privileges to access the WI.

. Once your password is accepted you'll access the configuration interface. You'll see that is quite simple: It's structured in different sections; each of them corresponds to a BibFormat feature and you can navigate through them by using a navigation bar that is always present on the left.

[Fig. 3]

Here you are a list of the different sections the interface offers you and their correspondence with BibFormat features:

Each section has different particularities but the way of dealing with them follows a common line through the interface. However, each section with their common things and particular characteristics are treated in the following chapters of this manual.

4.4. Mapping the input (OAI Extraction Rules)

We have already spoken a bit about BibFormat internal variables. These are a key point to understand the BibFormat way of working. As you know, BibFormat takes some bibliographic records as input and, according to some pre-configured behavior, formats them into HTML, for example. The problem is that this input records can come in several formats: different XML conventions, database records, etc. For now, at CDS we only consider that the input comes in OAI MARC XML but for the near future we'll may be have to extend it to accept other input formats.

That's the reason why internal variables appear; they provide a common way to refer to input data without relaying in any concrete format. In other words, we will define BibFormat links and behaviors referring to these internal variables and we'll have some rules that define how to map an input format to them, so we would be able to use any BibFormat defined behavior with any input that can be mapped to internal variables.

[Fig. 4]

You shouldn't worry about this because is more in the development/administration side, but it's important to know where internal variables come from and what they refer to. Besides, for CDS we only consider the incoming data in OAI MARC XML format, so we'll talk only about this case.

Internal variables are quite a simple concept: It's just a label that represents some values from the input. Besides, a variable can have fields that are also labels that represent values from the input but that are related to other under the variable (e.g. You can have a variable that maps authors and another that maps authors home institutes independently; but if you want to have represent an author and his home institute you need to relate these two variables in some way). Variables and their fields also support multiple values.

Focusing on OAI MARC XML, the concept of variable and field is already in the input structure.:

So what we will have in BibFormat is a set of rules that tells a variable name to which varfield element corresponds and each variable field name which subfield element maps. Trough the web interface you'll be able to add or delete new fields to variables or variables themselves, you'll be able even to modify the mapping tags of variables (this way you can keep your formats independent of changes in the meaning of MARC tags).

In the web interface, all this is located in OAI Ext. Rules section as you can see in the following figure:

[Fig. 5]

Let's illustrate how BibFormat maps a certain input to variables and fields with an example:

We have this variable & field definition on BibFormat:

Var.
label
Mapping tag Mult. V. Fields
100 <varfield id="100" i1="" i2=""> Yes
Field label Mapping tag
a <subfield label="a">
e <subfield label="e">
909C0 <varfield id="909" i1="C" i2="0"> No
Field label Mapping tag
b <subfield label="b">

And then a record like the following arrives as input:

   <oai_marc>
    <varfield id="037" i1="" i2="">
     <subfield label="a">SCAN-0009119</subfield>
    </varfield>
    <varfield id="100" i1="" i2="">
     <subfield label="a">Racah, Giulio</subfield>
    </varfield>
    <varfield id="100" i1="" i2="">
     <subfield label="a">Guignard, G</subfield>
     <subfield label="e">editor</subfield>
    </varfield>
    <varfield id="909" i1="C" i2="0">
     <subfield label="b">11</subfield>
    </varfield>
    <varfield id="909" i1="C" i2="0">
     <subfield label="b">12</subfield>
    </varfield>
   </oai_marc>
 

The result of the mapping would be like this:

Variable "100"
Value# 0   Field "a" value Racah, Giulio
Value# 1   Field "a" value Guignard, G
Field "e" value editor

Variable "909C0"
Value# 0   Field "b" value 12

Notice how varfield 037 is not considered because there isn't an entry in the BibFormat configuration. Also notice how the values are created: if "allow multiple values" is set to "Yes" each occurrence of a varfield element determines a new value (variable "100"); in other case, the last value is taken as single value for the variable (variable "909C0").

4.5. Defining output types: Behaviors

Now that we already know how internal variables are structured and what they represent in the input, it's time to have a look at how to configure BibFormat to transform that input data mapped into variables into HTML results (although any text-based output could be generated).

When BibFormat is asked to format a bunch of bibliographic records, it is also necessary to specify which output type it has to use. This output type is a string that identifies a pre-configured set of conditions and actions that tells BibFormat how to behave with the given input data (that's why the terms output type and behavior are used indifferently along this document).

BibFormat can have several pre-configured behaviors each one identified by a different label. There are two different types of behaviors (you can choose the behavior type when you define it):

  1. Normal: Consists in a behavior that outputs exactly the result of its evaluation.
  2. Input Erich (only for XML inputs): It echoes each xml record from the input inserting the behavior result just before the xml closing element of the record.

Each behavior contains an ordered list of conditions; a condition can contain zero or more associated actions (actions are ordered inside a condition). A condition is a behavior item described by an Evaluation Language expression that gives as result "TRUE" or "FALSE". An action is an Evaluation Language (EL) statement that produces any output.

When BibFormat is called to format a set of input records with a given behavior label, it looks for the behavior conditions. It evaluates their EL in order and when one of them produces "TRUE" as result, it looks for their associated actions. Then BibFormat evaluates the actions in the specified order and concatenates their result.

By using different conditions you can specify alternative formats inside a behavior (imagine that you want to format a record differently depending on its base number); it's true that you could also reach this solution by using EL IF statements, but it's more clear, efficient and re-usable (you can change one condition without touching the rest or you can give it more priority than others, that means give it the chance to be evaluated before others, by changing its apply order).

Actions are used for specifying the format itself or the actions you want to carry on with in case the condition is accomplished.

Through the web interface you can define new output types or modify the ones that already exist. The use is quite easy: you just have to select the link in the desired item with the operation you want to do over it.

[Fig. 6]

Let's have a look at a simple example to illustrate how to define behavior that fit our needs:

Imagine a typical case where you want to format bibliographic records but depending on their base number you want to apply different formats. Whenever a record from base 27 (standards) arrives we want only to show its title and the standard numbers, in other case a default format will be applied in which the title and authors are shown. We'll assume CDS variable notation and that the input rules are defined properly.

We are going to define a new NORMAL behavior for this new situation, let's call it SIMPLE. In it we'll need two conditions to be defined: one for applying the default format and another one for the 27-base special one. The base number comes in variable 909C0.b, so the conditions would be based on this variable content.

The result behavior should be defined like this:

SIMPLE(NORMAL)
10 $909C0.b="27"
 
 "<b>"$245.a"</b>"
 forall($0248.a){
  rep_prefix(" - ") $0248.a separator("; ")
 }
 
 
50 ""=""
 
 "<b"$245.a"</b>"
 forall($100.a){
  rep_prefix(" - Authors:") $100.a separator("; ")
 }
 
 

Some explanations on this example are needed:

4.6. Formats

Formats are a special construction that BibFormat Evaluation Language (EL) offers. It allows you to group under an identifier some EL code and after you can call it from every EL statement.

You can manage these formats using the web interface. It is quite easy to do so: When you access the Formats section it will present you a list with all the format identifiers that are already defined and a small documentation about what's the format for. From there you can see the whole EL code by using the link [Code]. You can add a new format by using the set of input boxes that you'll find at the end of the page. Also delete and modify operations are possible for already defined formats.

[Fig. 7]

Note: When defining formats, one has to pay attention not to use "recursive" format calls (either direct or indirect); this can lead to execution problems. For example, imagine that we have a format called "ex 1" that has a call for itself:

Format "ex_1"
 "hello world" 
 format("ex_1")
 

this is a "direct" recursive call; you should never have these kind of calls as the web interface should warn you if it finds these kind of troubles. However, "indirect" calls are not detected by the web interface, so you have to care about them. One example of "indirect" recursion:

Format "ex_1"
 "hello world" 
 format("ex_2")
 

4.7. Knowledge bases (KBs)

This is yet another special feature provided by BibFormat Evaluation Language. In a few words, this allows you to map one string value to another according to a pre-stored set of key values that map to other values (the knowledge bases). All the knowledge bases are identified by a label that has to be unique (among other KBs identifiers); remember that identifiers are not case-sensitive.

These sets of values, normally lived in a file, but with this new development there was the need to have an easy KB management that was integrated in BibFormat. For this reason, you can manage KBs from the BibFormat configuration interface: section KBs.

When accessing to KBs section, the list of all the KBs identifiers defined will be displayed. Below it you'll find a set of controls to add new KBs; the use of these controls is as usual along the interface but there's something a bit special: Normally, you shouldn't fill in the input box that asks you for the Knowledge base table name; all the knowledge base data is handled by a database in which each KB corresponds to a DB table; this input box gets the internal table name for that KB; normally the KB manager will generate it for you so you shouldn't need to use it.

[Fig. 8]

Each KB has a link for accessing the list of values that it contains. If you click on it, a new window will show you the list of current values (key and mapped ones) and a very easy interface to add new values or to delete existing ones (KB values are case sensitive).

[Fig. 9]

4.8. User Defined Functions (UDFs)

The use of User Defined Functions (UDFs) is one of the more powerful features of BibFormat Evaluation Language (EL). The idea is that inside EL you can use operations or functions over strings; normally a large number of different string transformations are needed when talking about formatting but we cannot pretend implement all this operations inside EL because it's in constant growing and new needs appear all the time. For dealing with this problem, BibFormat defines a mechanism that allows you to use define as much functions (UDFs) as you want and use them inside any EL statement.

These functions are identified by a unique name and they receive data (over which they do operations) by parameters. These functions are defined in a programming language (PHP) and therefore good knowledge of this language is needed.

BibFormat offers a complete UDF management through the UDFs web interface section. There you'll see a complete list of all defined UDFs with their identifier, parameters and a small documentation about what the UDF does. You can also add, delete or modify UDFs or even have a look at the PHP code of an already defined function (there you'll be able to launch small tests over the defined functions).

[Fig. 10]

The definition of these functions should be reserved to administrators and some particularities have to be taken into account when defining UDFs:

4.9. Defining links

As we've already said, BibFormat is not only a formatter but it also provides a link manager but, what do we mean by 'link manager'? The idea is to have a set of rules that describe how to generate a link using certain data; if the link can be generated from those rules, then the link manager can check different things (i.e. see if the link is valid, if it's a link to a file it can check if the file exists and in which formats it exists, etc) and finally return the solved link. In other words, if you have a set of bibliographic records that can contain a certain link and that link can be coded in the link manager rules, you don't need to store each link in each bibliographic record, you just use the link manager to generate them dynamically; like this, you only have to maintain a small set of rules and not thousands of static links in records.

BibFormat allows you to configure different link definitions each of them identified by a unique name; each of these link definitions have some associated parameters which are the information passed to the rules defined for it. Then, when you call the link manager to solve a link (from an EL statement, for example) you'll have to specify the identifier of the link definition you want to be used and the value for each of the parameters used by that link definition (always string values). The link manager will retrieve the rules associated to the link definition specified and will interpret those rules using the given parameter values, informing you if the link was generated correctly and result (the solved link).

BibFormat provides this mechanism and through the web interface you can access to the rule repository for having a look at what are the available link definitions, define new link rules or maintain already defined ones. When adding or modifying a link definition you'll have to specify the parameters, please remember to separate them by using comas.

[Fig. 10]

Link definitions are structurally quite similar to behaviors: Although there can be different types of them (as we'll see later), a link definition is made up of one or more conditions and each of these conditions can have one or more actions that tell how the link has to be built in case its condition is accomplished. In general, link rules (this includes conditions and actions) have a particular structure and they are described in Evaluation Language (EL) with one restriction: EL LINK statement cannot be used. Each group of conditions-actions of a link definition can be of a different solving type (actually, when you create a new link definition, its solving type its asked; this is just because all conditions that will be created for that link definition will have the selected solving type as default; but you can change it afterwards having a "mixed" link definition). Their structure and way the link manager interprets them will depend in their solving type. Currently, there you can define link conditions of two different solving types: EXTERNAL or INTERNAL. A more detailed explanation about each type is given later.

As we've said a link definition is made up of various link conditions. When a solving for a concrete link definition is asked, the link manager retrieves all link conditions associated to it. Then it takes the first of them (following the evaluation order - the lower is the evaluation order number, the first the condition is considered), it evaluates its EL code with the parameter values passed and if the result is "TRUE" associated actions are executed, the link is returned and the solving process finishes. In case a condition fails, it looks for the next one. If all the conditions fail then the link manager returns that the link couldn't be solved. This is the general behavior of the link manager, but the way of determining if a link has been solved or not and the link building depends on the condition solving type.

4.9.1. EXTERNAL link conditions

This is the simplest way of solving links. It's intended to be used when you want to generate a link that points to an external resource (normally a web page). In this case the link condition is composed by only one action that will be evaluated if the associated condition is "TRUE". When a condition of this type is evaluated "TRUE" and the action is executed, the result of the action is given as the solved link and the link manager finishes.

[Fig. 11]

4.9.2. INTERNAL link conditions

This condition solving type is intended to be used when you want to link to a document which is a file (inside or outside your file system) and that can be in different file formats.

This case is a bit more complex than the previous one, so we'll go step-by-step explaining differences and special features:

Field name Value that contains
url The same value as the LINK variable: The solved URL.
file Contains the local full path to the file the solved URL points to.
format_id Contains the file format id string
format_desc Contains the file format description string (this is defined for each file format)

4.9.2 Example

As the link generation is quite a complex topic (specially when talking about INTERNAL linking) we'll try to illustrate it with a simple example.

Let's imagine we want to create a new link definition for generating full-text access to the documents that are archived on a document server (a file system which contains document's electronic versions). These documents are organized systematically depending in three characteristics that are included in the bibliographic records: BASE, CATEGORY and ID. When the base corresponds to "CERNREP" then the files are archived below directory /pub/www/home/cernrep/ and can be stored following two different criteria that depend on the CATEGORY and ID values; the documents are all HTML. However, if the base is "PREPRINT" and the CATEGORY is either "HEP-TH" or "HEP-PH" they are stored under directory /archive/electronic|/pub/www/home/ following a certain criteria; in this case the documents can be in several file formats: PDF, Postscript, MS Word.

Of course, we want only the link to be created if the files corresponding to the bibliographic records exist.

So we start creating a new link definition that we'll call FULLTEXT. It will receive three parameters that are the information we need for generating this kind of links: BASE, CATEGORY and ID. We select INTERNAL as solving type as default and then we fill it the base file path and url with some default values (these values are not important, they will be copied by default to the conditions we are going to create afterwards).

[Fig. 12]

Then we create a condition for the first possibility: when BASE is "CERNREP". We select INTERNAL as link condition because we want to link to a file and we want to check its existence and we fill in the base file path and URL with the corresponding values. Then we assign the file format types and we enter the file archiving criteria as different actions.

[Fig. 13]

For the other possibility we proceed in the same way by adapting the definition to the requirements; we'll have something like this as result:

[Fig. 14]

Once we have finished the link definition, we can insert links of this type from a BibFormat behavior, for example. Let's imagine we have included a piece of EL code like this in a behavior because we want to insert a link to the full-text documents of any record:

 
 link("FULLTEXT",$base, $category, $id)
 {
   "Fulltext: "
   forall($link){
     "<a href=\"" $link.url "\">" $link.format_desc "</a>"
     separator " - "
   }
 }
 
 

This EL statement will include the string "Fulltext: " followed by a link to all the documents found for the values of internal variables $base, $category, $id separated by " - ".

4.10. User management

The BibFormat web interface (WI) comes with a security mechanism which allows you to define which users can access the WI. BibFormat doesn't have a user management incorporated; instead it uses CDS user schema (as is a part of CDS). So if you are not registered as CDS user and you want to have access to BibFormat WI, first thing to do is to register in CDS through the standard procedure (for example via the CDS Search interface you can access the CDs account management system).

BibFormat WI access policy is rather simple: it keeps a list of CDS users that can access the WI. Then if someone tries to access any part of the WI, the system will ask the user to identify him as CDS user. If the CDS login is successful and the user is in BibFormat's access list, then the user will gain access to the WI.

There's a section in the WI which allows you to define which CDS users will have access to the WI. The use is rather simple: You can add CDS users to the access list by specifying either their CDS user id or their CDS login; then you can delete a CDS uses from the access list by simply selecting the link "delete" for the corresponding user.

[Fig. 15]

When you install BibFormat for the first time and you access to the WI you'll see that no login or password is asked. The security mechanism doesn't get activate until at least one user is added to the BibFormat's access list. So if you don't want to limit the access to BibFormat WI keep the access list without any user in.

4.11. Evaluation Language Reference

In this section we'll present a more or less formal definition of the Evaluation Language (EL); although we are using some formal methods to describe it we'll also make a quick explanation about the elements that made up the language and how to combine them to arrive to desired results.

Just below you can find the EL definition, expressed in terms of EBNF (Extended Backus-Naur Form) notation. We have used capital letters to express non-terminal elements and non-capital/bold characters for the terminal ones. There's one remark to make: Whenever you find the mark [REX] after any definition, it means that we have used a regular expression just before in order to express a set of non-terminals.

 
  SENTENCE ::= TERM {&& TERM | || TERM}
 
  TERM ::= FACTOR {= FACTOR | != FACTOR | FACTOR}
 
  FACTOR ::= [!] BASIC
 
  BASIC ::= VARIABLE | LITERAL | FUNCTION | ( SENTENCE ) | FORALL |
            IF | FORMAT | LINK | COUNT | KB
 
  VARIABLE ::= $ STRING [. STRING]
 
  LITERAL ::= "([^"] | \")*" [REX]
 
  FUNCTION ::= STRING ( [ SENTENCE {, SENTENCE} ] )
 
  FORALL ::= forall ( VARIABLE [, LITERAL] ) { SENTENCE }
 
  IF ::= if( SENTENCE ) { SENTENCE } [else { SENTENCE }]
 
  FORMAT ::= format( SENTENCE )
 
  LINK ::= link( SENTENCE , [SENTENCE {, SENTENCE}] ) { SENTENCE }
           [else { SENTENCE }]
 
  COUNT ::= count( VARIABLE )
 
  KB ::= kb( SENTENCE )
 
  STRING ::= [a-zA-Z0-9_] [REX]
 
 

This is just a formal way of describing the language, but don't worry if you don't understand it very well because just below these lines we'll try to describe it in a more informal way.

To begin with, you should know that EL is a language designed to work with strings (a string is a collection of characters) but it has also some logic and comparison operations. One important thing you have to be aware of is that in EL blank spaces, tabulators or carriage returns have no more meaning than separator for elements of the language; that means that between two basic elements you can have as many spaces or carriage returns as you want.

One of the basic elements of the language is what we call LITERALS. These things represent constant string values; they are delimited by a pair of double quote (") symbols surrounding the string you want to express. Everything you put inside the double quotes will be considered as it is, so inside a literal several spaces or carriage have meaning (it's the only case). If you want to express a double quote symbol inside a literal you have to escape it using \.

Some examples of literals:

Another important basic element of the language is VARIABLES. These elements represent string data from the input to which you can refer inside of the language (and is considered also as a string). Variables are defined in advance by the administrator (or even users) so you have to know which of them you have access to. Additionally, variables can contain FIELDS that are simply other input values that are grouped under a variable because they have some kind of relationship between them (for example, you could have a variable for the information about the author and fields like name, born place, etc for it). If you want to know more about variables and their correspondence with the input you can look at the Mapping the Input section. The way of expressing a variable in EL is by a dollar symbol followed by any letter, number or underscore; variables are case-insensitive. To refer to any field of a variable, you simply put a dot followed by the field name (which is also made up of any character, number or underscore).

Some examples about variables and fields:

Now that we know basic elements of the language we can start thinking about how to combine them. The most important (and unique) string operation is concatenation: adding strings. This operation is implicit to the language, so we just put language elements one before another, and the representation result will be the result of the basic elements one after another.

Some samples:

These two, literals and variables, are only basic elements of the EL. You can combine them using concatenation to get new strings. But, of course, there are some more operations you can apply over strings: UDFs (User Defined Functions). We'll also name these elements as functions, because they are that: functions or operations to be applied over strings; when talking about strings we include basic elements or resulting string from applying any operations. A UDF has a name that identifies it uniquely and needs to get some information that we call parameters. A UDF gives another string as result depending on the parameter values (always strings). So to represent a function in EL you need its name followed by an open parenthesis, the parameter values separated by comas and a closing parenthesis. There's a list of UDF you can look at through the interface but this list can be extended to fit your needs (look at UDFs section of this manual).

Some examples:

As you can see, these UDFs are very powerful because you can concatenate their result with another element (literal, variable or even function) and the parameters can be basic elements or expressions. We can extend this ensuring that any element or expression of the EL that gives as result a string value can be combined with other EL expressions or elements.

Another very useful feature of EL is the possibility to use KWONLEDGE BASES (KBs). A KB is just a set of key values that map (one-to-one) another set of values; may be knowledge bases isn't a very appropriate name because they are more like translation tables. BibFormat offers tools to create and maintain KBs that can be used in the EL afterwards (see chapter KBs management in this manual). You can see KB invocation as a special function (the syntax for calling it is the same) with name kb and that takes two parameters: one for indicating the KB name (BibFormat can handle several KBs) and another one for the key value to translate. The result is the mapped KB value or an empty string if it doesn't exist as a key value in the specified KB. A typical example is when you have months with numbers and you want to translate them into month names; you could have a KB that maps all the month numbers to month names and then call it like this kb("MONTH", $m).

Now let's move to FORMATS. Formats are some EL code which is grouped under a label (a name) and that can be used in any other EL statement. BibFormat allows the user to define as many formats as he wants and identify each of them with a simple name. In few words, formats allow you to reuse EL code; within a format you can put any EL code (even other format calls) and all the variable values are completely available.  Again, a format call in EL follows the same convention as functions: the word format followed by the format name (a string) between parenthesis. When you call a format is like if the EL code define inside that format was pasted, as it is in the place you make the call.

Example: Imagine you have to write the title of a bibliographic records with a certain format, let's say in bold and red; but this formatted title you are going to use it in several places. So can take advantages of EL formats and define a format called TITLE that contains the code "<font color=\"red\"><b>" $245.b "</b></font>". Once this is done, you could use it to format records by printing their title in that way and their author after it: format("TITLE") "/" $100.a. The good thing is that if some day you decide to change the title formatting you'd only need to modify the TITLE format definition and not all the places where you show the title.

At this point, you have seen basic elements and operations with EL. You may think that is powerful enough to express your formatting work, but there are more complex situations that you'll have to face. We have tried to design the EL to be easy enough but with the next advanced structures, sometimes, can arrive to be a bit complex.

All these basic elements and operations are quite OK. But there are sometimes where you want to compare expressions and decide what to do depending on the result of the comparison. For this purpose, EL has an IF statement and a few comparison and logic operators built in (don't forget that any functionality needed can be achieved by defining new UDFs; EL gives basic operations to provide this possibility). Let's go step by step: First let's talk about the set of operators that can be used in a comparison:

  1. Comparison operators: Equal and non-equal (=, !=). They take two operators that have to be strings and produce a logic (true or false) value.
  2. Logical operators: AND, OR and NOT (&&, ||, !). All of them have to be used over logical values, taking two operators AND and OR, and one operator NOT.

All of them are right associative (except NOT which is unary left-associative) and their precedence goes like this (more to less): NOT, (EQUAL, NON-EQUAL), (AND, OR). These operators cannot be used anywhere, only inside statements that expect a logic value as result, in other words, inside condition statements.

The IF structure is quite easy to learn: First we indicate the word IF followed by a condition statement surrounded by parenthesis; then a EL statement into braces can be specified, this statement will be executed only if the condition was true; optionally, we can add an ELSE word followed by another EL statement into braces, that will only be triggered if the IF condition was not true.

Let's have a look at some examples:

BibFormat is not only an EL processor. Among others, it contains a link solver that contains it's own rule repository in order to be able to automatically solve links (see chapter Link solver of this manual). EL has one special structure for asking the link solver for some links and including them in the formatted version of the bibliographic record. This way links are easy to maintain (you modify the rules independently from where the link is being used) and as re-usable as formats or UDFs. Links are identified by a label and need some information to be passed as parameters; then an EL statement has to be specified which will be effective only if the link is solved and inside which, you'll have access to an special variable, named LINK, which contains the solved link among other information (see chapter Link solver for more information about which values are accessible); additionally, an else statement can be added (following the same syntax as in the IF construction) that will be effective only if the link can't be solved by the Link solver.

Example:

The next step when talking about EL components is to deal with multiple values. Life is no so easy and, of course, and a bibliographic record can have more than one author or can have a related document which is in more than one format and that has to be linked. In other words, BibFormat supports having variables and fields with multiple values (see chapter Mapping input), consequently a way of applying an EL statement over all the values of a variable or a field would be quite useful. FORALL is our construction!! It allows you to specify a variable or a field followed by a EL statement (between braces) that will be applied for every value of the variable or the field; any reference to the iteration variable inside the FORALL EL statement will be related to the current iteration variable value (if you refer to a variable that has multiple values outside a FORALL the first value is considered). One limitation is that you shouldn't nest FORALL statements, in other words, never put a FORALL inside another one. This construction let's you also limit the number of times you want to iterate over a variable or field by adding a literal with the number of iterations.

Some examples:

Finally, there's still one EL special function: COUNT. Due to certain special situations or strange input data in the variables, sometimes is useful to know how many values contain a variable or a field. So this function, simply takes a variable or field as argument and returns a string with the number of values that contains; if the value returned is 0, that means that no value is in the variable, what means that variable doesn't exist or there weren't any values mapped from the input.

Examples:

diff --git a/modules/bibformat/lib/bibformat_config.py b/modules/bibformat/lib/bibformat_config.py index a42786558..9ac9c977c 100644 --- a/modules/bibformat/lib/bibformat_config.py +++ b/modules/bibformat/lib/bibformat_config.py @@ -1,95 +1,96 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. # pylint: disable-msg=C0301 """BibFormat configuration parameters.""" __revision__ = "$Id$" import os from invenio.config import etcdir, pylibdir #True if old php format written in EL must be used by Invenio. #False if new python format must be used. If set to 'False' but #new format cannot be found, old format will be used. CFG_BIBFORMAT_USE_OLD_BIBFORMAT = False #Paths to main formats directories CFG_BIBFORMAT_TEMPLATES_PATH = "%s%sbibformat%sformat_templates" % (etcdir, os.sep, os.sep) CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH = "invenio.bibformat_elements" CFG_BIBFORMAT_ELEMENTS_PATH = "%s%sinvenio%sbibformat_elements" % (pylibdir, os.sep, os.sep) CFG_BIBFORMAT_OUTPUTS_PATH = "%s%sbibformat%soutput_formats" % (etcdir, os.sep, os.sep) #File extensions of formats CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION = "bft" CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION = "bfo" CFG_BIBFORMAT_ERROR_MESSAGES = \ { 'ERR_BIBFORMAT_INVALID_TAG' : '%s is an invalid tag.', 'ERR_BIBFORMAT_NO_TEMPLATE_FOUND' : 'No template could be found for output format %s.', 'ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME' : 'Could not find format element corresponding to %s.', 'ERR_BIBFORMAT_CANNOT_RESOLVE_OUTPUT_NAME' : 'Could not find output format corresponding to %s.', 'ERR_BIBFORMAT_CANNOT_RESOLVE_TEMPLATE_FILE' : 'Could not find format template corresponding to %s.', 'ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND' : 'Format element %s could not be found.', 'ERR_BIBFORMAT_BAD_BFO_RECORD' : 'Could not initialize new BibFormatObject with record id %s.', 'ERR_BIBFORMAT_NB_OUTPUTS_LIMIT_REACHED' : 'Could not find a fresh name for output format %s.', 'ERR_BIBFORMAT_KB_ID_UNKNOWN' : 'Knowledge base with id %s is unknown.', 'ERR_BIBFORMAT_OUTPUT_FORMAT_CODE_UNKNOWN' : 'Output format with code %s could not be found.', 'ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE' : 'Format template %s cannot not be read. %s', 'ERR_BIBFORMAT_CANNOT_WRITE_TEMPLATE_FILE' : 'BibFormat could not write to format template %s. %s', 'ERR_BIBFORMAT_CANNOT_READ_OUTPUT_FILE' : 'Output format %s cannot not be read. %s', 'ERR_BIBFORMAT_CANNOT_WRITE_OUTPUT_FILE' : 'BibFormat could not write to output format %s. %s', - 'ERR_BIBFORMAT_EVALUATING_ELEMENT' : 'Error when evaluating format element %s with parameters. %s', + 'ERR_BIBFORMAT_EVALUATING_ELEMENT' : 'Error when evaluating format element %s with parameters %s', 'ERR_BIBFORMAT_CANNOT_READ_ELEMENT_FILE' : 'Output format %s cannot not be read. %s', 'ERR_BIBFORMAT_INVALID_OUTPUT_RULE_FIELD' : 'Should be "tag field_number:" at line %s.', 'ERR_BIBFORMAT_INVALID_OUTPUT_RULE_FIELD_TAG' : 'Invalid tag "%s" at line %s.', 'ERR_BIBFORMAT_OUTPUT_CONDITION_OUTSIDE_FIELD': 'Condition "%s" is outside a tag specification at line %s.', 'ERR_BIBFORMAT_INVALID_OUTPUT_CONDITION' : 'Condition "%s" can only have a single separator --- at line %s.', 'ERR_BIBFORMAT_WRONG_OUTPUT_RULE_TEMPLATE_REF': 'Template "%s" does not exist at line %s.', 'ERR_BIBFORMAT_WRONG_OUTPUT_LINE' : 'Line %s could not be understood at line %s.', 'ERR_BIBFORMAT_OUTPUT_WRONG_TAG_CASE' : '"tag" must be lowercase in "%s" at line %s.', 'ERR_BIBFORMAT_OUTPUT_RULE_FIELD_COL' : 'Tag specification "%s" must end with column ":" at line %s.', 'ERR_BIBFORMAT_OUTPUT_TAG_MISSING' : 'Tag specification "%s" must start with "tag" at line %s.', 'ERR_BIBFORMAT_OUTPUT_WRONG_DEFAULT_CASE' : '"default" keyword must be lowercase in "%s" at line %s', 'ERR_BIBFORMAT_OUTPUT_RULE_DEFAULT_COL' : 'Missing column ":" after "default" in "%s" at line %s.', 'ERR_BIBFORMAT_OUTPUT_DEFAULT_MISSING' : 'Default template specification "%s" must start with "default :" at line %s.', 'ERR_BIBFORMAT_VALIDATE_NO_FORMAT' : 'No format specified for validation. Please specify one.', 'ERR_BIBFORMAT_TEMPLATE_HAS_NO_NAME' : 'Could not find a name specified in tag "" inside format template %s.', 'ERR_BIBFORMAT_TEMPLATE_HAS_NO_DESCRIPTION' : 'Could not find a description specified in tag "" inside format template %s.', 'ERR_BIBFORMAT_TEMPLATE_CALLS_UNREADABLE_ELEM': 'Format template %s calls unreadable element "%s". Check element file permissions.', 'ERR_BIBFORMAT_TEMPLATE_CALLS_UNLOADABLE_ELEM': 'Cannot load element "%s" in template %s. Check element code.', 'ERR_BIBFORMAT_TEMPLATE_CALLS_UNDEFINED_ELEM' : 'Format template %s calls undefined element "%s".', 'ERR_BIBFORMAT_TEMPLATE_WRONG_ELEM_ARG' : 'Format element %s uses unknown parameter "%s" in format template %s.', 'ERR_BIBFORMAT_IN_FORMAT_ELEMENT' : 'Error in format element %s. %s', 'ERR_BIBFORMAT_NO_RECORD_FOUND_FOR_PATTERN' : 'No Record Found for %s.', - 'ERR_BIBFORMAT_NBMAX_NOT_INT' : '"nbMax" parameter for %s must be an "int".' + 'ERR_BIBFORMAT_NBMAX_NOT_INT' : '"nbMax" parameter for %s must be an "int".', + 'ERR_BIBFORMAT_EVALUATING_ELEMENT_ESCAPE' : 'Escape mode for format element %s could not be retrieved. Using default mode instead.' } CFG_BIBFORMAT_WARNING_MESSAGES = \ { 'WRN_BIBFORMAT_OUTPUT_FORMAT_NAME_TOO_LONG' : 'Name %s is too long for output format %s in language %s. Truncated to first 256 characters.', 'WRN_BIBFORMAT_KB_NAME_UNKNOWN' : 'Cannot find knowledge base named %s.', 'WRN_BIBFORMAT_KB_MAPPING_UNKNOWN' : 'Cannot find a mapping with key %s in knowledge base %s.', 'WRN_BIBFORMAT_CANNOT_WRITE_IN_ETC_BIBFORMAT' : 'Cannot write in etc/bibformat dir of your Invenio installation. Check directory permission.', 'WRN_BIBFORMAT_CANNOT_WRITE_MIGRATION_STATUS' : 'Cannot write file migration_status.txt in etc/bibformat dir of your Invenio installation. Check file permission.', - 'WRN_BIBFORMAT_CANNOT_EXECUTE_REQUEST' : 'You request could not be executed.' + 'WRN_BIBFORMAT_CANNOT_EXECUTE_REQUEST' : 'Your request could not be executed.' } diff --git a/modules/bibformat/lib/bibformat_engine.py b/modules/bibformat/lib/bibformat_engine.py index adbf9cbf1..5f92c6709 100644 --- a/modules/bibformat/lib/bibformat_engine.py +++ b/modules/bibformat/lib/bibformat_engine.py @@ -1,1673 +1,1706 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Formats a single XML Marc record using specified format. There is no API for the engine. Instead use bibformat.py. SEE: bibformat.py, bibformat_utils.py """ __revision__ = "$Id$" import re import sys import os import inspect import traceback import zlib import cgi from invenio.config import \ CFG_PATH_PHP, \ bindir, \ cdslang from invenio.errorlib import \ register_errors, \ get_msgs_for_code_list from invenio.bibrecord import \ create_record, \ record_get_field_instances, \ record_get_field_value, \ record_get_field_values from invenio.dbquery import run_sql from invenio.messages import \ language_list_long, \ wash_language from invenio import bibformat_dblayer from invenio.bibformat_config import \ CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION, \ CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, \ CFG_BIBFORMAT_TEMPLATES_PATH, \ CFG_BIBFORMAT_ELEMENTS_PATH, \ CFG_BIBFORMAT_OUTPUTS_PATH, \ CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH from bibformat_utils import \ record_get_xml, \ parse_tag from xml.dom import minidom #Remove when call_old_bibformat is removed # Cache for data we have already read and parsed format_templates_cache = {} format_elements_cache = {} format_outputs_cache = {} kb_mappings_cache = {} cdslangs = language_list_long() # Regular expression for finding ... tag in format templates pattern_lang = re.compile(r''' #closing start tag (?P.*?) #anything but the next group (greedy) () #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) # Builds regular expression for finding each known language in tags ln_pattern_text = r"<(" for lang in cdslangs: ln_pattern_text += lang[0] +r"|" ln_pattern_text = ln_pattern_text.rstrip(r"|") ln_pattern_text += r")>(.*?)" ln_pattern = re.compile(ln_pattern_text) # Regular expression for finding tag in format templates pattern_format_template_name = re.compile(r''' #closing start tag (?P.*?) #name value. any char that is not end tag ()(\n)? #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) # Regular expression for finding tag in format templates pattern_format_template_desc = re.compile(r''' #closing start tag (?P.*?) #description value. any char that is not end tag (\n)? #end tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) # Regular expression for finding tags in format templates pattern_tag = re.compile(r''' [^/\s]+) #any char but a space or slash \s* #any number of spaces (?P(\s* #params here (?P([^=\s])*)\s* #param name: any chars that is not a white space or equality. Followed by space(s) =\s* #equality: = followed by any number of spaces (?P[\'"]) #one of the separators (?P.*?) #param value: any chars that is not a separator like previous one (?P=sep) #same separator as starting one )*) #many params \s* #any number of spaces (/)?> #end of the tag ''', re.IGNORECASE | re.DOTALL | re.VERBOSE) # Regular expression for finding params inside tags in format templates pattern_function_params = re.compile(''' (?P([^=\s])*)\s* # Param name: any chars that is not a white space or equality. Followed by space(s) =\s* # Equality: = followed by any number of spaces (?P[\'"]) # One of the separators (?P.*?) # Param value: any chars that is not a separator like previous one (?P=sep) # Same separator as starting one ''', re.VERBOSE | re.DOTALL ) # Regular expression for finding format elements "params" attributes # (defined by @param) pattern_format_element_params = re.compile(''' @param\s* # Begins with @param keyword followed by space(s) (?P[^\s=]*)\s* # A single keyword, and then space(s) #(=\s*(?P[\'"]) # Equality, space(s) and then one of the separators #(?P.*?) # Default value: any chars that is not a separator like previous one #(?P=sep) # Same separator as starting one #)?\s* # Default value for param is optional. Followed by space(s) (?P.*) # Any text that is not end of line (thanks to MULTILINE parameter) ''', re.VERBOSE | re.MULTILINE) # Regular expression for finding format elements "see also" attribute # (defined by @see) pattern_format_element_seealso = re.compile('''@see\s*(?P.*)''', re.VERBOSE | re.MULTILINE) #Regular expression for finding 2 expressions in quotes, separated by #comma (as in template("1st","2nd") ) #Used when parsing output formats ## pattern_parse_tuple_in_quotes = re.compile(''' ## (?P[\'"]) ## (?P.*) ## (?P=sep1) ## \s*,\s* ## (?P[\'"]) ## (?P.*) ## (?P=sep2) ## ''', re.VERBOSE | re.MULTILINE) def call_old_bibformat(recID, format="HD", on_the_fly=False): """ FIXME: REMOVE FUNCTION WHEN MIGRATION IS DONE Calls BibFormat for the record RECID in the desired output format FORMAT. @param on_the_fly if False, try to return an already preformatted version of the record in the database Note: this functions always try to return HTML, so when bibformat returns XML with embedded HTML format inside the tag FMT $g, as is suitable for prestoring output formats, we perform un-XML-izing here in order to return HTML body only. """ res = [] if not on_the_fly: # look for formatted notice existence: query = "SELECT value FROM bibfmt WHERE "\ "id_bibrec='%s' AND format='%s'" % (recID, format) res = run_sql(query, None, 1) if res: # record 'recID' is formatted in 'format', so print it decompress = zlib.decompress return "%s" % decompress(res[0][0]) else: # record 'recID' is not formatted in 'format', # so try to call BibFormat on the fly or use default format: out = "" pipe_input, pipe_output, pipe_error = os.popen3(["%s/bibformat" % bindir, "otype=%s" % format], 'rw') #pipe_input.write(print_record(recID, "xm")) pipe_input.write(record_get_xml(recID, "xm")) pipe_input.close() bibformat_output = pipe_output.read() pipe_output.close() pipe_error.close() if bibformat_output.startswith(""): dom = minidom.parseString(bibformat_output) for e in dom.getElementsByTagName('subfield'): if e.getAttribute('code') == 'g': for t in e.childNodes: out += t.data.encode('utf-8') else: out = bibformat_output return out def format_record(recID, of, ln=cdslang, verbose=0, search_pattern=[], xml_record=None, uid=None): """ Formats a record given output format. Main entry function of bibformat engine. Returns a formatted version of the record in the specified language, search pattern, and with the specified output format. The function will define which format template must be applied. You can either specify an record ID to format, or give its xml representation. if 'xml_record' is not None, then use it instead of recID. 'uid' allows to grant access to some functionalities on a page depending on the user's priviledges. @param recID the ID of record to format @param of an output format code (or short identifier for the output format) @param ln the language to use to format the record @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, stop if error in format elements 9: errors and warnings, stop if error (debug mode )) @param search_pattern list of strings representing the user request in web interface @param xml_record an xml string representing the record to format @param uid the user id of the person who will view the formatted page @return formatted record """ errors_ = [] # Temporary workflow (during migration of formats): # Call new BibFormat # But if format not found for new BibFormat, then call old BibFormat #Create a BibFormat Object to pass that contain record and context bfo = BibFormatObject(recID, ln, search_pattern, xml_record, uid) #Find out which format template to use based on record and output format. template = decide_format_template(bfo, of) ############### FIXME: REMOVE WHEN MIGRATION IS DONE ############### path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, template) if template is None or not os.access(path, os.R_OK): # template not found in new BibFormat. Call old one if CFG_PATH_PHP: return call_old_bibformat(recID, format=of, on_the_fly=True) ############################# END ################################## error = get_msgs_for_code_list([("ERR_BIBFORMAT_NO_TEMPLATE_FOUND", of)], stream='error', ln=cdslang) errors_.append(error) if verbose == 0: register_errors(error, 'error') elif verbose > 5: return error[0][1] return "" #Format with template (out, errors) = format_with_format_template(template, bfo, verbose) errors_.extend(errors) return out def decide_format_template(bfo, of): """ Returns the format template name that should be used for formatting given output format and BibFormatObject. Look at of rules, and take the first matching one. If no rule matches, returns None To match we ignore lettercase and spaces before and after value of rule and value of record @param bfo a BibFormatObject @param of the code of the output format to use """ output_format = get_output_format(of) for rule in output_format['rules']: value = bfo.field(rule['field']).strip()#Remove spaces pattern = rule['value'].strip() #Remove spaces if re.match(pattern, value, re.IGNORECASE) is not None: return rule['template'] template = output_format['default'] if template != '': return template else: return None def format_with_format_template(format_template_filename, bfo, verbose=0, format_template_code=None): """ Format a record given a format template. Also returns errors Returns a formatted version of the record represented by bfo, in the language specified in bfo, and with the specified format template. Parameter format_template_filename will be ignored if format_template_code is provided. This allows to preview format code without having to save file on disk @param format_template_filename the dilename of a format template @param bfo the object containing parameters for the current formatting @param format_template_code if not empty, use code as template instead of reading format_template_filename (used for previews) @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (formatted text, errors) """ errors_ = [] if format_template_code is not None: format_content = str(format_template_code) else: format_content = get_format_template(format_template_filename)['code'] localized_format = filter_languages(format_content, bfo.lang) (evaluated_format, errors) = eval_format_template_elements(localized_format, bfo, verbose) errors_ = errors return (evaluated_format, errors) def eval_format_template_elements(format_template, bfo, verbose=0): """ Evalutes the format elements of the given template and replace each element with its value. Also returns errors. Prepare the format template content so that we can directly replace the marc code by their value. This implies: 1) Look for special tags 2) replace special tags by their evaluation @param format_template the format template code @param bfo the object containing parameters for the current formatting @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (result, errors) """ errors_ = [] # First define insert_element_code(match), used in re.sub() function def insert_element_code(match): """ Analyses 'match', interpret the corresponding code, and return the result of the evaluation. Called by substitution in 'eval_format_template_elements(...)' @param match a match object corresponding to the special tag that must be interpreted """ function_name = match.group("function_name") format_element = get_format_element(function_name, verbose) params = {} # Look for function parameters given in format template code all_params = match.group('params') if all_params is not None: function_params_iterator = pattern_function_params.finditer(all_params) for param_match in function_params_iterator: name = param_match.group('param') value = param_match.group('value') params[name] = value # Evaluate element with params and return (Do not return errors) (result, errors) = eval_format_element(format_element, bfo, params, verbose) errors_ = errors return result # Substitute special tags in the format by our own text. # Special tags have the form format = pattern_tag.sub(insert_element_code, format_template) return (format, errors_) def eval_format_element(format_element, bfo, parameters={}, verbose=0): """ Returns the result of the evaluation of the given format element name, with given BibFormatObject and parameters. Also returns the errors of the evaluation. @param format_element a format element structure as returned by get_format_element @param bfo a BibFormatObject used for formatting @param parameters a dict of parameters to be used for formatting. Key is parameter and value is value of parameter @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return tuple (result, errors) """ errors = [] #Load special values given as parameters prefix = parameters.get('prefix', "") suffix = parameters.get('suffix', "") default_value = parameters.get('default', "") - escape = parameters.get('escape', "0") + escape = parameters.get('escape', "") # 3 possible cases: # a) format element file is found: we execute it # b) format element file is not found, but exist in tag table (e.g. bfe_isbn) # c) format element is totally unknown. Do nothing or report error if format_element is not None and format_element['type'] == "python": # a) We found an element with the tag name, of type "python" # Prepare a dict 'params' to pass as parameter to 'format' # function of element params = {} # Look for parameters defined in format element # Fill them with specified default values and values # given as parameters for param in format_element['attrs']['params']: name = param['name'] default = param['default'] params[name] = parameters.get(name, default) # Add BibFormatObject params['bfo'] = bfo # Execute function with given parameters and return result. - output_text = "" function = format_element['code'] try: output_text = apply(function, (), params) except Exception, e: - output_text = "" name = format_element['attrs']['name'] error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT", name, str(params)) errors.append(error) if verbose == 0: register_errors(errors, 'error') elif verbose >=5: tb = sys.exc_info()[2] error_string = get_msgs_for_code_list(error, stream='error', ln=cdslang) stack = traceback.format_exception(Exception, e, tb, limit=None) output_text = ''+ \ - error_string[0][1] + "".join(stack) +' ' - + str(error_string[0][1]) + "".join(stack) +' ' + # None can be returned when evaluating function if output_text is None: output_text = "" else: output_text = str(output_text) - # If escape is equal to 1, then escape all + + # Escaping: + # (1) By default, everything is escaped in mode 1 + # (2) If evaluated element has 'escape_values()' function, use + # its returned value as escape mode, and override (1) + # (3) If template has a defined parameter (in allowed values), + # use it, and override (1) and (2) + + # (1) + escape_mode = 1 + + # (2) + escape_function = format_element['escape_function'] + if escape_function is not None: + try: + escape_mode = apply(escape_function, (), {'bfo': bfo}) + except Exception, e: + error = ("ERR_BIBFORMAT_EVALUATING_ELEMENT_ESCAPE", name) + errors.append(error) + if verbose == 0: + register_errors(errors, 'error') + elif verbose >=5: + tb = sys.exc_info()[2] + error_string = get_msgs_for_code_list(error, + stream='error', + ln=cdslang) + output_text += ''+ \ + str(error_string[0][1]) +' ' + # (3) + if escape in ['0', '1']: + escape_mode = int(escape) + + #If escape is equal to 1, then escape all # HTML reserved chars. - if escape == '1': + if escape_mode == 1: output_text = cgi.escape(output_text) # Add prefix and suffix if they have been given as parameters and if # the evaluation of element is not empty if output_text.strip() != "": output_text = prefix + output_text + suffix # Add the default value if output_text is empty if output_text == "": output_text = default_value return (output_text, errors) elif format_element is not None and format_element['type'] == "field": # b) We have not found an element in files that has the tag # name. Then look for it in the table "tag" # # # # Load special values given as parameters separator = parameters.get('separator ', "") nbMax = parameters.get('nbMax', "") - escape = parameters.get('escape', "0") + escape = parameters.get('escape', "1") # By default, escape here # Get the fields tags that have to be printed tags = format_element['attrs']['tags'] output_text = [] # Get values corresponding to tags for tag in tags: p_tag = parse_tag(tag) values = record_get_field_values(bfo.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) if len(values)>0 and isinstance(values[0], dict): #flatten dict to its values only values_list = map(lambda x: x.values(), values) #output_text.extend(values) for values in values_list: output_text.extend(values) else: output_text.extend(values) if nbMax != "": try: nbMax = int(nbMax) output_text = output_text[:nbMax] except: name = format_element['attrs']['name'] error = ("ERR_BIBFORMAT_NBMAX_NOT_INT", name) errors.append(error) if verbose < 5: register_errors(error, 'error') elif verbose >=5: error_string = get_msgs_for_code_list(error, stream='error', ln=cdslang) output_text = output_text.append(error_string[0][1]) # Add prefix and suffix if they have been given as parameters and if # the evaluation of element is not empty. # If evaluation is empty string, return default value if it exists. # Else return empty string if ("".join(output_text)).strip() != "": # If escape is equal to 1, then escape all # HTML reserved chars. if escape == '1': output_text = cgi.escape(separator.join(output_text)) else: output_text = separator.join(output_text) output_text = prefix + output_text + suffix else: #Return default value output_text = default_value return (output_text, errors) else: # c) Element is unknown error = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_ELEMENT_NAME", format_element)], stream='error', ln=cdslang) errors.append(error) if verbose < 5: register_errors(error, 'error') return ("", errors) elif verbose >=5: if verbose >= 9: sys.exit(error[0][1]) return ('' + \ error[0][1]+'', errors) def filter_languages(format_template, ln='en'): """ Filters the language tags that do not correspond to the specified language. @param format_template the format template code @param ln the language that is NOT filtered out from the template @return the format template with unnecessary languages filtered out """ # First define search_lang_tag(match) and clean_language_tag(match), used # in re.sub() function def search_lang_tag(match): """ Searches for the ... tag and remove inner localized tags such as , , that are not current_lang. If current_lang cannot be found inside ... , try to use 'cdslang' @param match a match object corresponding to the special tag that must be interpreted """ current_lang = ln def clean_language_tag(match): """ Return tag text content if tag language of match is output language. Called by substitution in 'filter_languages(...)' @param match a match object corresponding to the special tag that must be interpreted """ if match.group(1) == current_lang: return match.group(2) else: return "" # End of clean_language_tag lang_tag_content = match.group("langs") # Try to find tag with current lang. If it does not exists, # then current_lang becomes cdslang until the end of this # replace pattern_current_lang = re.compile(r"<"+current_lang+ \ "\s*>(.*?)") if re.search(pattern_current_lang, lang_tag_content) is None: current_lang = cdslang cleaned_lang_tag = ln_pattern.sub(clean_language_tag, lang_tag_content) return cleaned_lang_tag # End of search_lang_tag filtered_format_template = pattern_lang.sub(search_lang_tag, format_template) return filtered_format_template def get_format_template(filename, with_attributes=False): """ Returns the structured content of the given formate template. if 'with_attributes' is true, returns the name and description. Else 'attrs' is not returned as key in dictionary (it might, if it has already been loaded previously) {'code':"Some template code" 'attrs': {'name': "a name", 'description': "a description"} } @param filename the filename of an format template @param with_attributes if True, fetch the attributes (names and description) for format' @return strucured content of format template """ # Get from cache whenever possible global format_templates_cache if not filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION): return None if format_templates_cache.has_key(filename): # If we must return with attributes and template exist in # cache with attributes then return cache. # Else reload with attributes if with_attributes and \ format_templates_cache[filename].has_key('attrs'): return format_templates_cache[filename] format_template = {'code':""} try: path = "%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, filename) format_file = open(path) format_content = format_file.read() format_file.close() # Load format template code # Remove name and description code_and_description = pattern_format_template_name.sub("", format_content) code = pattern_format_template_desc.sub("", code_and_description) # Escape % chars in code (because we will use python # formatting capabilities) format_template['code'] = code except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))], stream='error', ln=cdslang) register_errors(errors, 'error') # Save attributes if necessary if with_attributes: format_template['attrs'] = get_format_template_attrs(filename) # Cache and return format_templates_cache[filename] = format_template return format_template def get_format_templates(with_attributes=False): """ Returns the list of all format templates, as dictionary with filenames as keys if 'with_attributes' is true, returns the name and description. Else 'attrs' is not returned as key in each dictionary (it might, if it has already been loaded previously) [{'code':"Some template code" 'attrs': {'name': "a name", 'description': "a description"} }, ... } @param with_attributes if True, fetch the attributes (names and description) for formats """ format_templates = {} files = os.listdir(CFG_BIBFORMAT_TEMPLATES_PATH) for filename in files: if filename.endswith("."+CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION): format_templates[filename] = get_format_template(filename, with_attributes) return format_templates def get_format_template_attrs(filename): """ Returns the attributes of the format template with given filename The attributes are {'name', 'description'} Caution: the function does not check that path exists or that the format element is valid. @param the path to a format element """ attrs = {} attrs['name'] = "" attrs['description'] = "" try: template_file = open("%s%s%s" % (CFG_BIBFORMAT_TEMPLATES_PATH, os.sep, filename)) code = template_file.read() template_file.close() match = pattern_format_template_name.search(code) if match is not None: attrs['name'] = match.group('name') else: attrs['name'] = filename match = pattern_format_template_desc.search(code) if match is not None: attrs['description'] = match.group('desc').rstrip('.') except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_TEMPLATE_FILE", filename, str(e))], stream='error', ln=cdslang) register_errors(errors, 'error') attrs['name'] = filename return attrs def get_format_element(element_name, verbose=0, with_built_in_params=False): """ Returns the format element structured content. Return None if element cannot be loaded (file not found, not readable or invalid) The returned structure is {'attrs': {some attributes in dict. See get_format_element_attrs_from_*} 'code': the_function_code, - 'type':"field" or "python" depending if element is defined in file or table} + 'type':"field" or "python" depending if element is defined in file or table, + 'escape_function': the function to call to know if element output must be escaped} @param element_name the name of the format element to load @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @param with_built_in_params if True, load the parameters built in all elements @return a dictionary with format element attributes """ # Get from cache whenever possible global format_elements_cache # Resolve filename and prepare 'name' as key for the cache filename = resolve_format_element_filename(element_name) if filename is not None: name = filename.upper() else: name = element_name.upper() if format_elements_cache.has_key(name): element = format_elements_cache[name] if not with_built_in_params or \ (with_built_in_params and \ element['attrs'].has_key('builtin_params')): return element if filename is None: # Element is maybe in tag table if bibformat_dblayer.tag_exists_for_name(element_name): format_element = {'attrs': get_format_element_attrs_from_table( \ element_name, with_built_in_params), 'code':None, + 'escape_function':None, 'type':"field"} # Cache and returns format_elements_cache[name] = format_element return format_element else: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND", element_name)], stream='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >=5: sys.stderr.write(errors[0][1]) return None else: format_element = {} module_name = filename if module_name.endswith(".py"): module_name = module_name[:-3] try: module = __import__(CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH + \ "." + module_name) # Load last module in import path # For eg. load bibformat_elements in # invenio.elements.bibformat_element # Used to keep flexibility regarding where elements # directory is (for eg. test cases) components = CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH.split(".") for comp in components[1:]: module = getattr(module, comp) function_format = module.__dict__[module_name].format - + function_escape = getattr(module.__dict__[module_name], 'escape_values', None) format_element['code'] = function_format + format_element['escape_function'] = function_escape format_element['attrs'] = get_format_element_attrs_from_function( \ function_format, element_name, with_built_in_params) format_element['type'] = "python" # Cache and return format_elements_cache[name] = format_element return format_element except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_FORMAT_ELEMENT_NOT_FOUND", element_name)], stream='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(str(e)) sys.stderr.write(errors[0][1]) if verbose >= 7: raise e return None def get_format_elements(with_built_in_params=False): """ Returns the list of format elements attributes as dictionary structure Elements declared in files have priority over element declared in 'tag' table The returned object has this format: {element_name1: {'attrs': {'description':..., 'seealso':... 'params':[{'name':..., 'default':..., 'description':...}, ...] 'builtin_params':[{'name':..., 'default':..., 'description':...}, ...] }, 'code': code_of_the_element }, element_name2: {...}, ...} Returns only elements that could be loaded (not error in code) @return a dict of format elements with name as key, and a dict as attributes @param with_built_in_params if True, load the parameters built in all elements """ format_elements = {} mappings = bibformat_dblayer.get_all_name_tag_mappings() for name in mappings: format_elements[name.upper().replace(" ", "_").strip()] = get_format_element(name, with_built_in_params=with_built_in_params) files = os.listdir(CFG_BIBFORMAT_ELEMENTS_PATH) for filename in files: filename_test = filename.upper().replace(" ", "_") if filename_test.endswith(".PY") and filename.upper() != "__INIT__.PY": if filename_test.startswith("BFE_"): filename_test = filename_test[4:] element_name = filename_test[:-3] element = get_format_element(element_name, with_built_in_params=with_built_in_params) if element is not None: format_elements[element_name] = element return format_elements def get_format_element_attrs_from_function(function, element_name, with_built_in_params=False): """ Returns the attributes of the function given as parameter. It looks for standard parameters of the function, default values and comments in the docstring. The attributes are {'description', 'seealso':['element.py', ...], 'params':{name:{'name', 'default', 'description'}, ...], name2:{}} The attributes are {'name' : "name of element" #basically the name of 'name' parameter 'description': "a string description of the element", 'seealso' : ["element_1.py", "element_2.py", ...] #a list of related elements 'params': [{'name':"param_name", #a list of parameters for this element (except 'bfo') 'default':"default value", 'description': "a description"}, ...], 'builtin_params': {name: {'name':"param_name",#the parameters builtin for all elem of this kind 'default':"default value", 'description': "a description"}, ...}, } @param function the formatting function of a format element @param element_name the name of the element @param with_built_in_params if True, load the parameters built in all elements """ attrs = {} attrs['description'] = "" attrs['name'] = element_name.replace(" ", "_").upper() attrs['seealso'] = [] docstring = function.__doc__ if isinstance(docstring, str): # Look for function description in docstring #match = pattern_format_element_desc.search(docstring) description = docstring.split("@param")[0] description = description.split("@see")[0] attrs['description'] = description.strip().rstrip('.') # Look for @see in docstring match = pattern_format_element_seealso.search(docstring) if match is not None: elements = match.group('see').rstrip('.').split(",") for element in elements: attrs['seealso'].append(element.strip()) params = {} # Look for parameters in function definition (args, varargs, varkw, defaults) = inspect.getargspec(function) # Prepare args and defaults_list such that we can have a mapping # from args to defaults args.reverse() if defaults is not None: defaults_list = list(defaults) defaults_list.reverse() else: defaults_list = [] for arg, default in map(None, args, defaults_list): if arg == "bfo": #Don't keep this as parameter. It is hidden to users, and #exists in all elements of this kind continue param = {} param['name'] = arg if default is None: #In case no check is made inside element, we prefer to #print "" (nothing) than None in output param['default'] = "" else: param['default'] = default param['description'] = "(no description provided)" params[arg] = param if isinstance(docstring, str): # Look for @param descriptions in docstring. # Add description to existing parameters in params dict params_iterator = pattern_format_element_params.finditer(docstring) for match in params_iterator: name = match.group('name') if params.has_key(name): params[name]['description'] = match.group('desc').rstrip('.') attrs['params'] = params.values() # Load built-in parameters if necessary if with_built_in_params: builtin_params = [] # Add 'prefix' parameter param_prefix = {} param_prefix['name'] = "prefix" param_prefix['default'] = "" param_prefix['description'] = """A prefix printed only if the record has a value for this element""" builtin_params.append(param_prefix) # Add 'suffix' parameter param_suffix = {} param_suffix['name'] = "suffix" param_suffix['default'] = "" param_suffix['description'] = """A suffix printed only if the record has a value for this element""" builtin_params.append(param_suffix) # Add 'default' parameter param_default = {} param_default['name'] = "default" param_default['default'] = "" param_default['description'] = """A default value printed if the record has no value for this element""" builtin_params.append(param_default) # Add 'escape' parameter param_escape = {} param_escape['name'] = "escape" - param_escape['default'] = "0" + param_escape['default'] = "" param_escape['description'] = """If set to 1, replaces special characters '&', '<' and '>' of this element by SGML entities""" builtin_params.append(param_escape) attrs['builtin_params'] = builtin_params return attrs def get_format_element_attrs_from_table(element_name, with_built_in_params=False): """ Returns the attributes of the format element with given name in 'tag' table. Returns None if element_name does not exist in tag table. The attributes are {'name' : "name of element" #basically the name of 'element_name' parameter 'description': "a string description of the element", 'seealso' : [] #a list of related elements. Always empty in this case 'params': [], #a list of parameters for this element. Always empty in this case 'builtin_params': [{'name':"param_name", #the parameters builtin for all elem of this kind 'default':"default value", 'description': "a description"}, ...], 'tags':["950.1", 203.a] #the list of tags printed by this element } @param element_name an element name in database @param element_name the name of the element @param with_built_in_params if True, load the parameters built in all elements """ attrs = {} tags = bibformat_dblayer.get_tags_from_name(element_name) field_label = "field" if len(tags)>1: field_label = "fields" attrs['description'] = "Prints %s %s of the record" % (field_label, ", ".join(tags)) attrs['name'] = element_name.replace(" ", "_").upper() attrs['seealso'] = [] attrs['params'] = [] attrs['tags'] = tags # Load built-in parameters if necessary if with_built_in_params: builtin_params = [] # Add 'prefix' parameter param_prefix = {} param_prefix['name'] = "prefix" param_prefix['default'] = "" param_prefix['description'] = """A prefix printed only if the record has a value for this element""" builtin_params.append(param_prefix) # Add 'suffix' parameter param_suffix = {} param_suffix['name'] = "suffix" param_suffix['default'] = "" param_suffix['description'] = """A suffix printed only if the record has a value for this element""" builtin_params.append(param_suffix) # Add 'separator' parameter param_separator = {} param_separator['name'] = "separator" param_separator['default'] = " " param_separator['description'] = """A separator between elements of the field""" builtin_params.append(param_separator) # Add 'nbMax' parameter param_nbMax = {} param_nbMax['name'] = "nbMax" param_nbMax['default'] = "" param_nbMax['description'] = """The maximum number of values to print for this element. No limit if not specified""" builtin_params.append(param_nbMax) # Add 'default' parameter param_default = {} param_default['name'] = "default" param_default['default'] = "" param_default['description'] = """A default value printed if the record has no value for this element""" builtin_params.append(param_default) # Add 'escape' parameter param_escape = {} param_escape['name'] = "escape" - param_escape['default'] = "0" + param_escape['default'] = "" param_escape['description'] = """If set to 1, replaces special characters '&', '<' and '>' of this element by SGML entities""" builtin_params.append(param_escape) attrs['builtin_params'] = builtin_params return attrs def get_output_format(code, with_attributes=False, verbose=0): """ Returns the structured content of the given output format If 'with_attributes' is true, also returns the names and description of the output formats, else 'attrs' is not returned in dict (it might, if it has already been loaded previously). if output format corresponding to 'code' is not found return an empty structure. See get_output_format_attrs() to learn more on the attributes {'rules': [ {'field': "980__a", 'value': "PREPRINT", 'template': "filename_a.bft", }, {...} ], 'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1", 'content_type': "application/ms-excel" } 'default':"filename_b.bft" } @param code the code of an output_format @param with_attributes if True, fetch the attributes (names and description) for format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return strucured content of output format """ output_format = {'rules':[], 'default':""} filename = resolve_output_format_filename(code, verbose) if filename is None: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_OUTPUT_FORMAT_CODE_UNKNOWN", code)], stream='error', ln=cdslang) register_errors(errors, 'error') if with_attributes: #Create empty attrs if asked for attributes output_format['attrs'] = get_output_format_attrs(code, verbose) return output_format # Get from cache whenever possible global format_outputs_cache if format_outputs_cache.has_key(filename): # If was must return with attributes but cache has not # attributes, then load attributes if with_attributes and not \ format_outputs_cache[filename].has_key('attrs'): format_outputs_cache[filename]['attrs'] = get_output_format_attrs(code, verbose) return format_outputs_cache[filename] try: if with_attributes: output_format['attrs'] = get_output_format_attrs(code, verbose) path = "%s%s%s" % (CFG_BIBFORMAT_OUTPUTS_PATH, os.sep, filename ) format_file = open(path) current_tag = '' for line in format_file: line = line.strip() if line == "": # Ignore blank lines continue if line.endswith(":"): # Retrieve tag # Remove : spaces and eol at the end of line clean_line = line.rstrip(": \n\r") # The tag starts at second position current_tag = "".join(clean_line.split()[1:]).strip() elif line.find('---') != -1: words = line.split('---') template = words[-1].strip() condition = ''.join(words[:-1]) value = "" output_format['rules'].append({'field': current_tag, 'value': condition, 'template': template, }) elif line.find(':') != -1: # Default case default = line.split(':')[1].strip() output_format['default'] = default except Exception, e: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_READ_OUTPUT_FILE", filename, str(e))], stream='error', ln=cdslang) register_errors(errors, 'error') # Cache and return format_outputs_cache[filename] = output_format return output_format def get_output_format_attrs(code, verbose=0): """ Returns the attributes of an output format. The attributes contain 'code', which is the short identifier of the output format (to be given as parameter in format_record function to specify the output format), 'description', a description of the output format, and 'names', the localized names of the output format. If 'content_type' is specified then the search_engine will send a file with this content type and with result of formatting as content to the user. The 'names' dict always contais 'generic', 'ln' (for long name) and 'sn' (for short names) keys. 'generic' is the default name for output format. 'ln' and 'sn' contain long and short localized names of the output format. Only the languages for which a localization exist are used. {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1", 'content_type': "application/ms-excel" } @param code the short identifier of the format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return strucured content of output format attributes """ if code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION): code = code[:-(len(CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION) + 1)] attrs = {'names':{'generic':"", 'ln':{}, 'sn':{}}, 'description':'', 'code':code.upper(), 'content_type':""} filename = resolve_output_format_filename(code, verbose) if filename is None: return attrs attrs['names'] = bibformat_dblayer.get_output_format_names(code) attrs['description'] = bibformat_dblayer.get_output_format_description(code) attrs['content_type'] = bibformat_dblayer.get_output_format_content_type(code) return attrs def get_output_formats(with_attributes=False): """ Returns the list of all output format, as a dictionary with their filename as key If 'with_attributes' is true, also returns the names and description of the output formats, else 'attrs' is not returned in dicts (it might, if it has already been loaded previously). See get_output_format_attrs() to learn more on the attributes {'filename_1.bfo': {'rules': [ {'field': "980__a", 'value': "PREPRINT", 'template': "filename_a.bft", }, {...} ], 'attrs': {'names': {'generic':"a name", 'sn':{'en': "a name", 'fr':"un nom"}, 'ln':{'en':"a long name"}} 'description': "a description" 'code': "fnm1" } 'default':"filename_b.bft" }, 'filename_2.bfo': {...}, ... } @return the list of output formats """ output_formats = {} files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH) for filename in files: if filename.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION): code = "".join(filename.split(".")[:-1]) output_formats[filename] = get_output_format(code, with_attributes) return output_formats def get_kb_mapping(kb, string, default=""): """ Returns the value of the string' in the knowledge base 'kb'. If kb does not exist or string does not exist in kb, returns 'default' string value. @param kb a knowledge base name @param string a key in a knowledge base @param default a default value if 'string' is not in 'kb' @return the value corresponding to the given string in given kb """ global kb_mappings_cache if kb_mappings_cache.has_key(kb): kb_cache = kb_mappings_cache[kb] if kb_cache.has_key(string): value = kb_mappings_cache[kb][string] if value is None: return default else: return value else: # Precreate for caching this kb kb_mappings_cache[kb] = {} value = bibformat_dblayer.get_kb_mapping_value(kb, string) kb_mappings_cache[kb][str(string)] = value if value is None: return default else: return value def resolve_format_element_filename(string): """ Returns the filename of element corresponding to string This is necessary since format templates code call elements by ignoring case, for eg. is the same as . It is also recommended that format elements filenames are prefixed with bfe_ . We need to look for these too. The name of the element has to start with "BFE_". @param name a name for a format element @return the corresponding filename, with right case """ if not string.endswith(".py"): name = string.replace(" ", "_").upper() +".PY" else: name = string.replace(" ", "_").upper() files = os.listdir(CFG_BIBFORMAT_ELEMENTS_PATH) for filename in files: test_filename = filename.replace(" ", "_").upper() if test_filename == name or \ test_filename == "BFE_" + name or \ "BFE_" + test_filename == name: return filename # No element with that name found # Do not log error, as it might be a normal execution case: # element can be in database return None def resolve_output_format_filename(code, verbose=0): """ Returns the filename of output corresponding to code This is necessary since output formats names are not case sensitive but most file systems are. @param code the code for an output format @param verbose the level of verbosity from 0 to 9 (O: silent, 5: errors, 7: errors and warnings, 9: errors and warnings, stop if error (debug mode )) @return the corresponding filename, with right case, or None if not found """ #Remove non alphanumeric chars (except .) code = re.sub(r"[^.0-9a-zA-Z]", "", code) if not code.endswith("."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION): code = re.sub(r"\W", "", code) code += "."+CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION files = os.listdir(CFG_BIBFORMAT_OUTPUTS_PATH) for filename in files: if filename.upper() == code.upper(): return filename # No output format with that name found errors = get_msgs_for_code_list([("ERR_BIBFORMAT_CANNOT_RESOLVE_OUTPUT_NAME", code)], stream='error', ln=cdslang) if verbose == 0: register_errors(errors, 'error') elif verbose >= 5: sys.stderr.write(errors[0][1]) if verbose >= 9: sys.exit(errors[0][1]) return None def get_fresh_format_template_filename(name): """ Returns a new filename and name for template with given name. Used when writing a new template to a file, so that the name has no space, is unique in template directory Returns (unique_filename, modified_name) @param a name for a format template @return the corresponding filename, and modified name if necessary """ #name = re.sub(r"\W", "", name) #Remove non alphanumeric chars name = name.replace(" ", "_") filename = name # Remove non alphanumeric chars (except .) filename = re.sub(r"[^.0-9a-zA-Z]", "", filename) path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename \ + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION index = 1 while os.path.exists(path): index += 1 filename = name + str(index) path = CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename \ + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION if index > 1: returned_name = (name + str(index)).replace("_", " ") else: returned_name = name.replace("_", " ") return (filename + "." + CFG_BIBFORMAT_FORMAT_TEMPLATE_EXTENSION, returned_name) #filename.replace("_", " ")) def get_fresh_output_format_filename(code): """ Returns a new filename for output format with given code. Used when writing a new output format to a file, so that the code has no space, is unique in output format directory. The filename also need to be at most 6 chars long, as the convention is that filename == output format code (+ .extension) We return an uppercase code Returns (unique_filename, modified_code) @param code the code of an output format @return the corresponding filename, and modified code if necessary """ #code = re.sub(r"\W", "", code) #Remove non alphanumeric chars code = code.upper().replace(" ", "_") # Remove non alphanumeric chars (except .) code = re.sub(r"[^.0-9a-zA-Z]", "", code) if len(code) > 6: code = code[:6] filename = code path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \ + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION index = 2 while os.path.exists(path): filename = code + str(index) if len(filename) > 6: filename = code[:-(len(str(index)))]+str(index) index += 1 path = CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename \ + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION # We should not try more than 99999... Well I don't see how we # could get there.. Sanity check. if index >= 99999: errors = get_msgs_for_code_list([("ERR_BIBFORMAT_NB_OUTPUTS_LIMIT_REACHED", code)], stream='error', ln=cdslang) register_errors(errors, 'error') sys.exit("Output format cannot be named as %s"%code) return (filename + "." + CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION, filename) def clear_caches(): """ Clear the caches (Output Format, Format Templates and Format Elements) """ global format_templates_cache, format_elements_cache , \ format_outputs_cache, kb_mappings_cache format_templates_cache = {} format_elements_cache = {} format_outputs_cache = {} kb_mappings_cache = {} class BibFormatObject: """ An object that encapsulates a record and associated methods, and that is given as parameter to all format elements 'format' function. The object is made specifically for a given formatting, i.e. it includes for example the language for the formatting. The object provides basic accessors to the record. For full access, one can get the record with get_record() and then use BibRecord methods on the returned object. """ # The record record = None # The language in which the formatting has to be done lang = cdslang # A list of string describing the context in which the record has # to be formatted. # It represents the words of the user request in web interface search search_pattern = [] # The id of the record recID = 0 # The user id of the person who will view the formatted page (if applicable) # This allows for example to print a "edit record" link for people # who have right to edit a record. uid = None def __init__(self, recID, ln=cdslang, search_pattern=[], xml_record=None, uid=None): """ Creates a new bibformat object, with given record. You can either specify an record ID to format, or give its xml representation. if 'xml_record' is not None, use 'xml_record' instead of recID for the record. 'uid' allows to grant access to some functionalities on a page depending on the user's priviledges. @param recID the id of a record @param ln the language in which the record has to be formatted @param search_pattern list of string representing the request used by the user in web interface @param xml_record a xml string of the record to format @param uid the user id of the person who will view the formatted page """ if xml_record is not None: # If record is given as parameter self.record = create_record(xml_record)[0] recID = record_get_field_value(self.record,"001") self.lang = wash_language(ln) self.search_pattern = search_pattern self.recID = recID self.uid = uid def get_record(self): """ Returns the record of this BibFormatObject instance @return the record structure as returned by BibRecord """ # Create record if necessary if self.record is None: record = create_record(record_get_xml(self.recID, 'xm')) self.record = record[0] return self.record def control_field(self, tag): """ Returns the value of control field given by tag in record @param record the record to retrieve values from @param tag the marc code of a field @return value of field tag in record """ if self.get_record() is None: #Case where BibRecord could not parse object return '' p_tag = parse_tag(tag) return record_get_field_value(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) def field(self, tag): """ Returns the value of the field corresponding to tag in the current record. if the value does not exist, return empty string @param record the record to retrieve values from @param tag the marc code of a field @return value of field tag in record """ list_of_fields = self.fields(tag) if len(list_of_fields) > 0: return list_of_fields[0] else: return "" def fields(self, tag): """ Returns the list of values corresonding to "tag". If tag has an undefined subcode (such as 999C5), the function returns a list of dictionaries, whoose keys are the subcodes and the values are the values of tag.subcode. If the tag has a subcode, simply returns list of values corresponding to tag. @param record the record to retrieve values from @param tag the marc code of a field @return values of field tag in record """ if self.get_record() is None: # Case where BibRecord could not parse object return [] p_tag = parse_tag(tag) if p_tag[3] != "": # Subcode has been defined. Simply returns list of values return record_get_field_values(self.get_record(), p_tag[0], p_tag[1], p_tag[2], p_tag[3]) else: # Subcode is undefined. Returns list of dicts. # However it might be the case of a control field. list_of_dicts = [] instances = record_get_field_instances(self.get_record(), p_tag[0], p_tag[1], p_tag[2]) for instance in instances: instance_dict = dict(instance[0]) list_of_dicts.append(instance_dict) return list_of_dicts def kb(self, kb, string, default=""): """ Returns the value of the "string" in the knowledge base "kb". If kb does not exist or string does not exist in kb, returns 'default' string or empty string if not specified. @param kb a knowledge base name @param string the string we want to translate @param default a default value returned if 'string' not found in 'kb' """ if string is None: return default val = get_kb_mapping(kb, string, default) if val is None: return default else: return val def bf_profile(): """ Runs a benchmark """ for i in range(50): format_record(i, "HD", ln=cdslang, verbose=9, search_pattern=[]) return if __name__ == "__main__": import profile import pstats bf_profile() profile.run('bf_profile()', "bibformat_profile") p = pstats.Stats("bibformat_profile") p.strip_dirs().sort_stats("cumulative").print_stats() diff --git a/modules/bibformat/lib/bibformat_engine_tests.py b/modules/bibformat/lib/bibformat_engine_tests.py index e924f0030..c3265f1c8 100644 --- a/modules/bibformat/lib/bibformat_engine_tests.py +++ b/modules/bibformat/lib/bibformat_engine_tests.py @@ -1,685 +1,685 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """Test cases for the BibFormat engine. Also test some utilities function in bibformat_utils module""" __revision__ = "$Id$" # pylint: disable-msg=C0301 import unittest import os import sys from invenio import bibformat_engine from invenio import bibformat_utils from invenio import bibformat_config from invenio import bibrecord from invenio.config import tmpdir #CFG_BIBFORMAT_OUTPUTS_PATH = "..%setc%soutput_formats" % (os.sep, os.sep) #CFG_BIBFORMAT_TEMPLATES_PATH = "..%setc%sformat_templates" % (os.sep, os.sep) #CFG_BIBFORMAT_ELEMENTS_PATH = "elements" CFG_BIBFORMAT_OUTPUTS_PATH = "%s" % (tmpdir) CFG_BIBFORMAT_TEMPLATES_PATH = "%s" % (tmpdir) CFG_BIBFORMAT_ELEMENTS_PATH = "%s%stests_bibformat_elements" % (tmpdir, os.sep) CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH = "tests_bibformat_elements" class FormatTemplateTest(unittest.TestCase): """ bibformat - tests on format templates""" def test_get_format_template(self): """bibformat - format template parsing and returned structure""" bibformat_engine.CFG_BIBFORMAT_TEMPLATES_PATH = CFG_BIBFORMAT_TEMPLATES_PATH #Test correct parsing and structure template_1 = bibformat_engine.get_format_template("Test1.bft", with_attributes=True) self.assert_(template_1 is not None) self.assertEqual(template_1['code'], "test") self.assertEqual(template_1['attrs']['name'], "name_test") self.assertEqual(template_1['attrs']['description'], "desc_test") #Test correct parsing and structure of file without description or name template_2 = bibformat_engine.get_format_template("Test_2.bft", with_attributes=True) self.assert_(template_2 is not None) self.assertEqual(template_2['code'], "test") self.assertEqual(template_2['attrs']['name'], "Test_2.bft") self.assertEqual(template_2['attrs']['description'], "") #Test correct parsing and structure of file without description or name unknown_template = bibformat_engine.get_format_template("test_no_template.test", with_attributes=True) self.assertEqual(unknown_template, None) def test_get_format_templates(self): """ bibformat - loading multiple format templates""" bibformat_engine.CFG_BIBFORMAT_TEMPLATES_PATH = CFG_BIBFORMAT_TEMPLATES_PATH templates = bibformat_engine.get_format_templates(with_attributes=True) #test correct loading self.assert_("Test1.bft" in templates.keys()) self.assert_("Test_2.bft" in templates.keys()) self.assert_("Test3.bft" in templates.keys()) self.assert_("Test_no_template.test" not in templates.keys()) #Test correct pasrsing and structure self.assertEqual(templates['Test1.bft']['code'], "test") self.assertEqual(templates['Test1.bft']['attrs']['name'], "name_test") self.assertEqual(templates['Test1.bft']['attrs']['description'], "desc_test") def test_get_format_template_attrs(self): """ bibformat - correct parsing of attributes in format template""" bibformat_engine.CFG_BIBFORMAT_TEMPLATES_PATH = CFG_BIBFORMAT_TEMPLATES_PATH attrs = bibformat_engine.get_format_template_attrs("Test1.bft") self.assertEqual(attrs['name'], "name_test") self.assertEqual(attrs['description'], "desc_test") def test_get_fresh_format_template_filename(self): """ bibformat - getting fresh filename for format template""" bibformat_engine.CFG_BIBFORMAT_TEMPLATES_PATH = CFG_BIBFORMAT_TEMPLATES_PATH filename_and_name_1 = bibformat_engine.get_fresh_format_template_filename("Test") self.assert_(len(filename_and_name_1) >= 2) self.assertEqual(filename_and_name_1[0], "Test.bft") filename_and_name_2 = bibformat_engine.get_fresh_format_template_filename("Test1") self.assert_(len(filename_and_name_2) >= 2) self.assert_(filename_and_name_2[0] != "Test1.bft") path = bibformat_engine.CFG_BIBFORMAT_TEMPLATES_PATH + os.sep + filename_and_name_2[0] self.assert_(not os.path.exists(path)) class FormatElementTest(unittest.TestCase): """ bibformat - tests on format templates""" def setUp(self): # pylint: disable-msg=C0103 """bibformat - setting python path to test elements""" sys.path.append('%s' % tmpdir) def test_resolve_format_element_filename(self): """bibformat - resolving format elements filename """ bibformat_engine.CFG_BIBFORMAT_ELEMENTS_PATH = CFG_BIBFORMAT_ELEMENTS_PATH #Test elements filename starting without bfe_, with underscore instead of space filenames = ["test 1", "test 1.py", "bfe_test 1", "bfe_test 1.py", "BFE_test 1", "BFE_TEST 1", "BFE_TEST 1.py", "BFE_TeST 1.py", "BFE_TeST 1", "BfE_TeST 1.py", "BfE_TeST 1","test_1", "test_1.py", "bfe_test_1", "bfe_test_1.py", "BFE_test_1", "BFE_TEST_1", "BFE_TEST_1.py", "BFE_Test_1.py", "BFE_TeST_1", "BfE_TeST_1.py", "BfE_TeST_1"] for i in range(len(filenames)-2): filename_1 = bibformat_engine.resolve_format_element_filename(filenames[i]) self.assert_(filename_1 is not None) filename_2 = bibformat_engine.resolve_format_element_filename(filenames[i+1]) self.assertEqual(filename_1, filename_2) #Test elements filename starting with bfe_, and with underscores instead of spaces filenames = ["test 2", "test 2.py", "bfe_test 2", "bfe_test 2.py", "BFE_test 2", "BFE_TEST 2", "BFE_TEST 2.py", "BFE_TeST 2.py", "BFE_TeST 2", "BfE_TeST 2.py", "BfE_TeST 2","test_2", "test_2.py", "bfe_test_2", "bfe_test_2.py", "BFE_test_2", "BFE_TEST_2", "BFE_TEST_2.py", "BFE_TeST_2.py", "BFE_TeST_2", "BfE_TeST_2.py", "BfE_TeST_2"] for i in range(len(filenames)-2): filename_1 = bibformat_engine.resolve_format_element_filename(filenames[i]) self.assert_(filename_1 is not None) filename_2 = bibformat_engine.resolve_format_element_filename(filenames[i+1]) self.assertEqual(filename_1, filename_2) #Test non existing element non_existing_element = bibformat_engine.resolve_format_element_filename("BFE_NON_EXISTING_ELEMENT") self.assertEqual(non_existing_element, None) def test_get_format_element(self): """bibformat - format elements parsing and returned structure""" bibformat_engine.CFG_BIBFORMAT_ELEMENTS_PATH = CFG_BIBFORMAT_ELEMENTS_PATH bibformat_engine.CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH = CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH #Test loading with different kind of names, for element with spaces in name, without bfe_ element_1 = bibformat_engine.get_format_element("test 1", with_built_in_params=True) self.assert_(element_1 is not None) element_1_bis = bibformat_engine.get_format_element("bfe_tEst_1.py", with_built_in_params=True) self.assertEqual(element_1, element_1_bis) #Test loading with different kind of names, for element without spaces in name, wit bfe_ element_2 = bibformat_engine.get_format_element("test 2", with_built_in_params=True) self.assert_(element_2 is not None) element_2_bis = bibformat_engine.get_format_element("bfe_tEst_2.py", with_built_in_params=True) self.assertEqual(element_2, element_2_bis) #Test loading incorrect elements element_3 = bibformat_engine.get_format_element("test 3", with_built_in_params=True) self.assertEqual(element_3, None) element_4 = bibformat_engine.get_format_element("test 4", with_built_in_params=True) self.assertEqual(element_4, None) unknown_element = bibformat_engine.get_format_element("TEST_NO_ELEMENT", with_built_in_params=True) self.assertEqual(unknown_element, None) #Test element without docstring element_5 = bibformat_engine.get_format_element("test_5", with_built_in_params=True) self.assert_(element_5 is not None) self.assertEqual(element_5['attrs']['description'], '') self.assert_({'name':"param1", 'description':"(no description provided)", 'default':""} in element_5['attrs']['params'] ) self.assertEqual(element_5['attrs']['seealso'], []) #Test correct parsing: #Test type of element self.assertEqual(element_1['type'], "python") #Test name = element filename, with underscore instead of spaces, #without BFE_ and uppercase self.assertEqual(element_1['attrs']['name'], "TEST_1") #Test description parsing self.assertEqual(element_1['attrs']['description'], "Prints test") #Test @see parsing self.assertEqual(element_1['attrs']['seealso'], ["element2.py", "unknown_element.py"]) #Test @param parsing self.assert_({'name':"param1", 'description':"desc 1", 'default':""} in element_1['attrs']['params'] ) self.assert_({'name':"param2", 'description':"desc 2", 'default':"default value"} in element_1['attrs']['params'] ) #Test non existing element non_existing_element = bibformat_engine.get_format_element("BFE_NON_EXISTING_ELEMENT") self.assertEqual(non_existing_element, None) def test_get_format_element_attrs_from_function(self): """ bibformat - correct parsing of attributes in 'format' docstring""" bibformat_engine.CFG_BIBFORMAT_ELEMENTS_PATH = CFG_BIBFORMAT_ELEMENTS_PATH bibformat_engine.CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH = CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH element_1 = bibformat_engine.get_format_element("test 1", with_built_in_params=True) function = element_1['code'] attrs = bibformat_engine.get_format_element_attrs_from_function(function, element_1['attrs']['name'], with_built_in_params=True) self.assertEqual(attrs['name'], "TEST_1") #Test description parsing self.assertEqual(attrs['description'], "Prints test") #Test @see parsing self.assertEqual(attrs['seealso'], ["element2.py", "unknown_element.py"]) def test_get_format_elements(self): """bibformat - multiple format elements parsing and returned structure""" bibformat_engine.CFG_BIBFORMAT_ELEMENTS_PATH = CFG_BIBFORMAT_ELEMENTS_PATH bibformat_engine.CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH = CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH elements = bibformat_engine.get_format_elements() self.assert_(isinstance(elements, dict)) self.assertEqual(elements['TEST_1']['attrs']['name'], "TEST_1") self.assertEqual(elements['TEST_2']['attrs']['name'], "TEST_2") self.assert_("TEST_3" not in elements.keys()) self.assert_("TEST_4" not in elements.keys()) class OutputFormatTest(unittest.TestCase): """ bibformat - tests on output formats""" def test_get_output_format(self): """ bibformat - output format parsing and returned structure """ bibformat_engine.CFG_BIBFORMAT_OUTPUTS_PATH = CFG_BIBFORMAT_OUTPUTS_PATH filename_1 = bibformat_engine.resolve_output_format_filename("test1") output_1 = bibformat_engine.get_output_format(filename_1, with_attributes=True) self.assertEqual(output_1['attrs']['names']['generic'], "") self.assert_(isinstance(output_1['attrs']['names']['ln'], dict)) self.assert_(isinstance(output_1['attrs']['names']['sn'], dict)) self.assertEqual(output_1['attrs']['code'], "TEST1") self.assert_(len(output_1['attrs']['code']) <= 6) self.assertEqual(len(output_1['rules']), 4) self.assertEqual(output_1['rules'][0]['field'], '980.a') self.assertEqual(output_1['rules'][0]['template'], 'Picture_HTML_detailed.bft') self.assertEqual(output_1['rules'][0]['value'], 'PICTURE ') self.assertEqual(output_1['rules'][1]['field'], '980.a') self.assertEqual(output_1['rules'][1]['template'], 'Article.bft') self.assertEqual(output_1['rules'][1]['value'], 'ARTICLE') self.assertEqual(output_1['rules'][2]['field'], '980__a') self.assertEqual(output_1['rules'][2]['template'], 'Thesis_detailed.bft') self.assertEqual(output_1['rules'][2]['value'], 'THESIS ') self.assertEqual(output_1['rules'][3]['field'], '980__a') self.assertEqual(output_1['rules'][3]['template'], 'Pub.bft') self.assertEqual(output_1['rules'][3]['value'], 'PUBLICATION ') filename_2 = bibformat_engine.resolve_output_format_filename("TEST2") output_2 = bibformat_engine.get_output_format(filename_2, with_attributes=True) self.assertEqual(output_2['attrs']['names']['generic'], "") self.assert_(isinstance(output_2['attrs']['names']['ln'], dict)) self.assert_(isinstance(output_2['attrs']['names']['sn'], dict)) self.assertEqual(output_2['attrs']['code'], "TEST2") self.assert_(len(output_2['attrs']['code']) <= 6) self.assertEqual(output_2['rules'], []) unknown_output = bibformat_engine.get_output_format("unknow", with_attributes=True) self.assertEqual(unknown_output, {'rules':[], 'default':"", 'attrs':{'names':{'generic':"", 'ln':{}, 'sn':{}}, 'description':'', 'code':"UNKNOW", 'content_type':""}}) def test_get_output_formats(self): """ bibformat - loading multiple output formats """ bibformat_engine.CFG_BIBFORMAT_OUTPUTS_PATH = CFG_BIBFORMAT_OUTPUTS_PATH outputs = bibformat_engine.get_output_formats(with_attributes=True) self.assert_(isinstance(outputs, dict)) self.assert_("TEST1.bfo" in outputs.keys()) self.assert_("TEST2.bfo" in outputs.keys()) self.assert_("unknow.bfo" not in outputs.keys()) #Test correct parsing output_1 = outputs["TEST1.bfo"] self.assertEqual(output_1['attrs']['names']['generic'], "") self.assert_(isinstance(output_1['attrs']['names']['ln'], dict)) self.assert_(isinstance(output_1['attrs']['names']['sn'], dict)) self.assertEqual(output_1['attrs']['code'], "TEST1") self.assert_(len(output_1['attrs']['code']) <= 6) def test_get_output_format_attrs(self): """ bibformat - correct parsing of attributes in output format""" bibformat_engine.CFG_BIBFORMAT_OUTPUTS_PATH = CFG_BIBFORMAT_OUTPUTS_PATH attrs= bibformat_engine.get_output_format_attrs("TEST1") self.assertEqual(attrs['names']['generic'], "") self.assert_(isinstance(attrs['names']['ln'], dict)) self.assert_(isinstance(attrs['names']['sn'], dict)) self.assertEqual(attrs['code'], "TEST1") self.assert_(len(attrs['code']) <= 6) def test_resolve_output_format(self): """ bibformat - resolving output format filename""" bibformat_engine.CFG_BIBFORMAT_OUTPUTS_PATH = CFG_BIBFORMAT_OUTPUTS_PATH filenames = ["test1", "test1.bfo", "TEST1", "TeST1", "TEST1.bfo", "test1"] for i in range(len(filenames)-2): filename_1 = bibformat_engine.resolve_output_format_filename(filenames[i]) self.assert_(filename_1 is not None) filename_2 = bibformat_engine.resolve_output_format_filename(filenames[i+1]) self.assertEqual(filename_1, filename_2) def test_get_fresh_output_format_filename(self): """ bibformat - getting fresh filename for output format""" bibformat_engine.CFG_BIBFORMAT_OUTPUTS_PATH = CFG_BIBFORMAT_OUTPUTS_PATH filename_and_name_1 = bibformat_engine.get_fresh_output_format_filename("test") self.assert_(len(filename_and_name_1) >= 2) self.assertEqual(filename_and_name_1[0], "TEST.bfo") filename_and_name_1_bis = bibformat_engine.get_fresh_output_format_filename("") self.assert_(len(filename_and_name_1_bis) >= 2) self.assertEqual(filename_and_name_1_bis[0], "TEST.bfo") filename_and_name_2 = bibformat_engine.get_fresh_output_format_filename("test1") self.assert_(len(filename_and_name_2) >= 2) self.assert_(filename_and_name_2[0] != "TEST1.bfo") path = bibformat_engine.CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename_and_name_2[0] self.assert_(not os.path.exists(path)) filename_and_name_3 = bibformat_engine.get_fresh_output_format_filename("test1testlong") self.assert_(len(filename_and_name_3) >= 2) self.assert_(filename_and_name_3[0] != "TEST1TESTLONG.bft") self.assert_(len(filename_and_name_3[0]) <= 6 + 1 + len(bibformat_config.CFG_BIBFORMAT_FORMAT_OUTPUT_EXTENSION)) path = bibformat_engine.CFG_BIBFORMAT_OUTPUTS_PATH + os.sep + filename_and_name_3[0] self.assert_(not os.path.exists(path)) class PatternTest(unittest.TestCase): """ bibformat - tests on re patterns""" def test_pattern_lang(self): """ bibformat - correctness of pattern 'pattern_lang'""" text = '''

Here is my test text

Some wordsQuelques motsEinige Wörter garbage Here ends the middle of my test text EnglishFrançaisDeutsch Here ends my test text

''' result = bibformat_engine.pattern_lang.search(text) self.assertEqual(result.group("langs"), "Some wordsQuelques motsEinige Wörter garbage ") text = '''

Here is my test text

''' result = bibformat_engine.pattern_lang.search(text) self.assertEqual(result.group("langs"), "Some wordsQuelques motsEinige Wörter garbage ") def test_ln_pattern(self): """ bibformat - correctness of pattern 'ln_pattern'""" text = "Some wordsQuelques motsEinige Wörter garbage " result = bibformat_engine.ln_pattern.search(text) self.assertEqual(result.group(1), "en") self.assertEqual(result.group(2), "Some words") def test_pattern_format_template_name(self): """ bibformat - correctness of pattern 'pattern_format_template_name'""" text = ''' garbage a name a description on 2 lines

the content of the template

content ''' result = bibformat_engine.pattern_format_template_name.search(text) self.assertEqual(result.group('name'), "a name") def test_pattern_format_template_desc(self): """ bibformat - correctness of pattern 'pattern_format_template_desc'""" text = ''' garbage a name a description on 2 lines

the content of the template

content ''' result = bibformat_engine.pattern_format_template_desc.search(text) self.assertEqual(result.group('desc'), '''a description on 2 lines ''') def test_pattern_tag(self): """ bibformat - correctness of pattern 'pattern_tag'""" text = ''' garbage but part of content a name a description on 2 lines

the content of the template

my content is so nice! ''' result = bibformat_engine.pattern_tag.search(text) self.assertEqual(result.group('function_name'), "tiTLE") self.assertEqual(result.group('params').strip(), '''param1="value1" param2=""''') def test_pattern_function_params(self): """ bibformat - correctness of pattern 'test_pattern_function_params'""" text = ''' param1="" param2="value2" param3="value3" garbage ''' names = ["param1", "param2", "param3"] values = ["", "value2", "value3"] results = bibformat_engine.pattern_format_element_params.finditer(text) #TODO param_i = 0 for match in results: self.assertEqual(match.group('param'), names[param_i]) self.assertEqual(match.group('value'), values [param_i]) param_i += 1 def test_pattern_format_element_params(self): """ bibformat - correctness of pattern 'pattern_format_element_params'""" text = ''' a description for my element some text @param param1 desc1 @param param2 desc2 @see seethis, seethat ''' names = ["param1", "param2"] descriptions = ["desc1", "desc2"] results = bibformat_engine.pattern_format_element_params.finditer(text) #TODO param_i = 0 for match in results: self.assertEqual(match.group('name'), names[param_i]) self.assertEqual(match.group('desc'), descriptions[param_i]) param_i += 1 def test_pattern_format_element_seealso(self): """ bibformat - correctness of pattern 'pattern_format_element_seealso' """ text = ''' a description for my element some text @param param1 desc1 @param param2 desc2 @see seethis, seethat ''' result = bibformat_engine.pattern_format_element_seealso.search(text) self.assertEqual(result.group('see').strip(), 'seethis, seethat') class MiscTest(unittest.TestCase): """ bibformat - tests on various functions""" def test_parse_tag(self): """ bibformat - result of parsing tags""" tags_and_parsed_tags = ['245COc', ['245', 'C', 'O', 'c'], '245C_c', ['245', 'C', '' , 'c'], '245__c', ['245', '' , '' , 'c'], '245__$$c', ['245', '' , '' , 'c'], '245__$c', ['245', '' , '' , 'c'], '245 $c', ['245', '' , '' , 'c'], '245 $$c', ['245', '' , '' , 'c'], '245__.c', ['245', '' , '' , 'c'], '245 .c', ['245', '' , '' , 'c'], '245C_$c', ['245', 'C', '' , 'c'], '245CO$$c', ['245', 'C', 'O', 'c'], '245CO.c', ['245', 'C', 'O', 'c'], '245$c', ['245', '' , '' , 'c'], '245.c', ['245', '' , '' , 'c'], '245$$c', ['245', '' , '' , 'c'], '245__%', ['245', '' , '' , '%'], '245__$$%', ['245', '' , '' , '%'], '245__$%', ['245', '' , '' , '%'], '245 $%', ['245', '' , '' , '%'], '245 $$%', ['245', '' , '' , '%'], '245$%', ['245', '' , '' , '%'], '245.%', ['245', '' , '' , '%'], '245_O.%', ['245', '' , 'O', '%'], '245.%', ['245', '' , '' , '%'], '245$$%', ['245', '' , '' , '%'], '2%5$$a', ['2%5', '' , '' , 'a'], '2%%%%a', ['2%%', '%', '%', 'a'], '2%%__a', ['2%%', '' , '' , 'a'], '2%%a', ['2%%', '' , '' , 'a']] for i in range(0, len(tags_and_parsed_tags), 2): parsed_tag = bibformat_utils.parse_tag(tags_and_parsed_tags[i]) self.assertEqual(parsed_tag, tags_and_parsed_tags[i+1]) class FormatTest(unittest.TestCase): """ bibformat - generic tests on function that do the formatting. Main functions""" def setUp(self): # pylint: disable-msg=C0103 """ bibformat - prepare BibRecord objects""" self.xml_text_1 = ''' 33 thesis Doe1, John Doe2, John editor On the foo and bar1 On the foo and bar2 99999 ''' #rec_1 = bibrecord.create_record(self.xml_text_1) self.bfo_1 = bibformat_engine.BibFormatObject(recID=None, ln='fr', xml_record=self.xml_text_1) self.xml_text_2 = ''' 33 thesis Doe1, John Doe2, John editor On the foo and bar1 On the foo and bar2 ''' #self.rec_2 = bibrecord.create_record(xml_text_2) self.bfo_2 = bibformat_engine.BibFormatObject(recID=None, ln='fr', xml_record=self.xml_text_2) self.xml_text_3 = ''' 33 eng Doe1, John Doe2, John editor On the foo and bar1 On the foo and bar2 article ''' #self.rec_3 = bibrecord.create_record(xml_text_3) self.bfo_3 = bibformat_engine.BibFormatObject(recID=None, ln='fr', xml_record=self.xml_text_3) def test_decide_format_template(self): """ bibformat - choice made by function decide_format_template""" bibformat_engine.CFG_BIBFORMAT_OUTPUTS_PATH = CFG_BIBFORMAT_OUTPUTS_PATH result = bibformat_engine.decide_format_template(self.bfo_1, "test1") self.assertEqual(result, "Thesis_detailed.bft") result = bibformat_engine.decide_format_template(self.bfo_3, "test3") self.assertEqual(result, "Test3.bft") #Only default matches result = bibformat_engine.decide_format_template(self.bfo_2, "test1") self.assertEqual(result, "Default_HTML_detailed.bft") #No match at all for record result = bibformat_engine.decide_format_template(self.bfo_2, "test2") self.assertEqual(result, None) #Non existing output format result = bibformat_engine.decide_format_template(self.bfo_2, "UNKNOW") self.assertEqual(result, None) def test_format_record(self): """ bibformat - correct formatting""" bibformat_engine.CFG_BIBFORMAT_OUTPUTS_PATH = CFG_BIBFORMAT_OUTPUTS_PATH bibformat_engine.CFG_BIBFORMAT_ELEMENTS_PATH = CFG_BIBFORMAT_ELEMENTS_PATH bibformat_engine.CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH = CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH bibformat_engine.CFG_BIBFORMAT_TEMPLATES_PATH = CFG_BIBFORMAT_TEMPLATES_PATH #use output format that has no match TEST DISABLED DURING MIGRATION #result = bibformat_engine.format_record(recID=None, of="test2", xml_record=self.xml_text_2) #self.assertEqual(result.replace("\n", ""),"") #use output format that link to unknown template result = bibformat_engine.format_record(recID=None, of="test3", xml_record=self.xml_text_2) self.assertEqual(result.replace("\n", ""),"") #Unknown output format TEST DISABLED DURING MIGRATION #result = bibformat_engine.format_record(recID=None, of="unkno", xml_record=self.xml_text_3) #self.assertEqual(result.replace("\n", ""),"") #Default formatting result = bibformat_engine.format_record(recID=None, ln='fr', of="test3", xml_record=self.xml_text_3) - self.assertEqual(result,'''

hi

this is my template\ntesttfrgarbage\n
test me!oka default valueeditor\n
test me!oka default valueeditor\n
test me!<b>ok</b>a default valueeditor\n''') - + self.assertEqual(result,'''

hi

this is my template\ntesttfrgarbage\n
test me!oka default valueeditor\n
test me!oka default valueeditor\n
test me!oka default valueeditor\n''') + def test_format_with_format_template(self): """ bibformat - correct formatting with given template""" bibformat_engine.CFG_BIBFORMAT_ELEMENTS_PATH = CFG_BIBFORMAT_ELEMENTS_PATH bibformat_engine.CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH = CFG_BIBFORMAT_ELEMENTS_IMPORT_PATH bibformat_engine.CFG_BIBFORMAT_TEMPLATES_PATH = CFG_BIBFORMAT_TEMPLATES_PATH template = bibformat_engine.get_format_template("Test3.bft") result = bibformat_engine.format_with_format_template(format_template_filename = None, bfo=self.bfo_1, verbose=0, format_template_code=template['code']) self.assert_(isinstance(result, tuple)) - self.assertEqual(result[0],'''

hi

this is my template\ntesttfrgarbage\n
test me!oka default valueeditor\n
test me!oka default valueeditor\n
test me!<b>ok</b>a default valueeditor\n99999''') + self.assertEqual(result[0],'''

hi

this is my template\ntesttfrgarbage\n
test me!oka default valueeditor\n
test me!oka default valueeditor\n
test me!oka default valueeditor\n99999''') def create_test_suite(): """Return test suite for the bibformat module""" return unittest.TestSuite((unittest.makeSuite(FormatTemplateTest,'test'), unittest.makeSuite(OutputFormatTest,'test'), unittest.makeSuite(FormatElementTest,'test'), unittest.makeSuite(PatternTest,'test'), unittest.makeSuite(MiscTest,'test'), unittest.makeSuite(FormatTest,'test'))) if __name__ == '__main__': unittest.TextTestRunner(verbosity=2).run(create_test_suite()) diff --git a/modules/bibformat/lib/elements/bfe_abstract.py b/modules/bibformat/lib/elements/bfe_abstract.py index 457033f54..8f2d2c43b 100644 --- a/modules/bibformat/lib/elements/bfe_abstract.py +++ b/modules/bibformat/lib/elements/bfe_abstract.py @@ -1,124 +1,133 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints English and French abstract. """ __revision__ = "$Id$" +import cgi from invenio import bibformat_utils def format(bfo, prefix_en, prefix_fr, suffix_en, suffix_fr, limit, extension_en="[...] ",extension_fr="[...] ", contextual="no", highlight='no', print_lang='en,fr'): - """ Prints the abstract of a record. By default prints English and French versions. + """ Prints the abstract of a record in HTML. By default prints English and French versions. Printed languages can be chosen with the 'print_lang' parameter. @param prefix_en a prefix for english abstract (printed only if english abstract exists) @param prefix_fr a prefix for french abstract (printed only if french abstract exists) @param limit the maximum number of sentences of the abstract to display (for each language) @param extension_en a text printed after english abstracts longer than parameter 'limit' @param extension_fr a text printed after french abstracts longer than parameter 'limit' @param suffix_en a suffix for english abstract(printed only if english abstract exists) @param suffix_fr a suffix for french abstract(printed only if french abstract exists) @parmm contextual if 'yes' prints sentences the most relative to user search keyword (if limit < abstract) @param highlight if 'yes' highlights words from user search keyword @param print_lang the comma-separated list of languages to print. Now restricted to 'en' and 'fr' """ out = '' languages = print_lang.split(',') abstract_en = bfo.fields('520__a') abstract_en.extend(bfo.fields('520__b')) + abstract_en = [cgi.escape(val) for val in abstract_en] abstract_en = "
".join(abstract_en) abstract_fr = bfo.fields('590__a') abstract_fr.extend(bfo.fields('590__b')) + abstract_fr = [cgi.escape(val) for val in abstract_fr] abstract_fr = "
".join(abstract_fr) if contextual == 'yes' and limit != "" and \ limit.isdigit() and int(limit) > 0: context_en = bibformat_utils.get_contextual_content(abstract_en, bfo.search_pattern, max_lines=int(limit)) #FIXME add something like [...] before and after #contextual sentences when not at beginning/end of abstract #if not abstract_en.strip().startswith(context_en[0].strip()): # out += '[...]' abstract_en = "
".join(context_en) context_fr = bibformat_utils.get_contextual_content(abstract_fr, bfo.search_pattern, max_lines=int(limit)) abstract_fr = "
".join(context_fr) if len(abstract_en) > 0 and 'en' in languages: out += prefix_en if limit != "" and limit.isdigit(): print_extension = False s_abstract = abstract_en.split(".") if int(limit) < len(s_abstract): print_extension = True s_abstract = s_abstract[:int(limit)] for sentence in s_abstract: out += sentence+ "." if print_extension: out += " "+extension_en else: out += abstract_en out += suffix_en if len(abstract_fr) > 0 and 'fr' in languages: out += prefix_fr if limit != "" and limit.isdigit(): print_extension = False s_abstract = abstract_fr.split(".") if int(limit) < len(s_abstract): print_extension = True s_abstract = s_abstract[:int(limit)] for sentence in s_abstract: out += sentence + "." if print_extension: out += " "+extension_fr else: out += abstract_fr out += suffix_fr if highlight == 'yes': out = bibformat_utils.highlight(out, bfo.search_pattern) return out +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_affiliation.py b/modules/bibformat/lib/elements/bfe_affiliation.py index 3b62855da..8e7de99a6 100644 --- a/modules/bibformat/lib/elements/bfe_affiliation.py +++ b/modules/bibformat/lib/elements/bfe_affiliation.py @@ -1,34 +1,43 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints affiliation """ __revision__ = "$Id$" +import cgi + def format(bfo): """ HTML Affiliation display """ affiliations = bfo.fields('909C1u') if len(affiliations) > 0: out = "
" for affiliation in affiliations: - out += affiliation +" " + out += cgi.escape(affiliation) +" " return out + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_authors.py b/modules/bibformat/lib/elements/bfe_authors.py index b25e3b3ed..f7ab74102 100644 --- a/modules/bibformat/lib/elements/bfe_authors.py +++ b/modules/bibformat/lib/elements/bfe_authors.py @@ -1,100 +1,106 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints authors """ __revision__ = "$Id$" def format(bfo, limit, separator=' ; ', extension='[...]', print_links="yes", interactive="no", highlight="no"): """ Prints the list of authors of a record. @param limit the maximum number of authors to display @param separator the separator between authors. @param extension a text printed if more authors than 'limit' exist @param print_links if yes, prints the authors as HTML link to their publications @param interactive if yes, enable user to show/hide authors when there are too many (html + javascript) @param highlight highlights authors corresponding to search query if set to 'yes' """ from urllib import quote from cgi import escape from invenio.config import weburl from invenio.messages import gettext_set_language _ = gettext_set_language(bfo.lang) # load the right message language authors = [] authors_1 = bfo.fields('100__a') authors_2 = bfo.fields('700__a') authors_3 = bfo.fields('270__p') authors.extend(authors_1) authors.extend(authors_2) authors.extend(authors_3) nb_authors = len(authors) if highlight == 'yes': from invenio import bibformat_utils authors = [bibformat_utils.highlight(x, bfo.search_pattern) for x in authors] if print_links.lower() == "yes": authors = map(lambda x: ''+escape(x)+'', authors) if limit.isdigit() and nb_authors > int(limit) and interactive != "yes": return separator.join(authors[:int(limit)]) + extension elif limit.isdigit() and nb_authors > int(limit) and interactive == "yes": out = ''' '''%{'show_less':_("Hide"), 'show_more':_("Show all %i authors") % nb_authors, 'extension':extension} out += '' out += separator.join(authors[:int(limit)]) out += ''+separator+separator.join(authors[int(limit):])+'' out += ' ' out += ' ' out += '' return out elif nb_authors > 0: return separator.join(authors) +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_bfx_engine.py b/modules/bibformat/lib/elements/bfe_bfx_engine.py index e2a0472d3..39c51ead4 100644 --- a/modules/bibformat/lib/elements/bfe_bfx_engine.py +++ b/modules/bibformat/lib/elements/bfe_bfx_engine.py @@ -1,40 +1,47 @@ ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Entry point for BibFormat XML engine """ __revision__ = "$Id$" from cStringIO import StringIO from invenio.bibformat_bfx_engine import format_with_bfx def format(bfo, template='DC'): """ An entry point to the BibFormat BFX engine, when used as an element. Formats the record according to a template. For further details, please read the documentation. @param template the name of the template file without the bfx extension """ output = "" recIDs = [bfo.recID] outFile = StringIO() # a virtual file-like object to write in format_with_bfx(recIDs, outFile, template) output = outFile.getvalue() return output + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_cited_by.py b/modules/bibformat/lib/elements/bfe_cited_by.py index b9ffe8551..74dcd087c 100644 --- a/modules/bibformat/lib/elements/bfe_cited_by.py +++ b/modules/bibformat/lib/elements/bfe_cited_by.py @@ -1,47 +1,54 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints reference to documents citing this one """ __revision__ = "$Id$" def format(bfo, separator='; '): """ Prints a list of records citing this record @param separator a separator between citations """ from urllib import quote from invenio.config import weburl primary_report_numbers = bfo.fields('037__a') additional_report_numbers = bfo.fields('088__a') primary_citations = map(lambda x: \ '' \ + x + '', primary_report_numbers) additional_citations = map(lambda x: \ '' \ + x + '', additional_report_numbers) citations = primary_citations citations.extend(additional_citations) return separator.join(citations) + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_edit_record.py b/modules/bibformat/lib/elements/bfe_edit_record.py index 4d52ac1cd..bde22c7fa 100644 --- a/modules/bibformat/lib/elements/bfe_edit_record.py +++ b/modules/bibformat/lib/elements/bfe_edit_record.py @@ -1,46 +1,53 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints a link to BibEdit """ __revision__ = "$Id$" def format(bfo, style): """ Prints a link to BibEdit, if authorization is granted @param style the CSS style to be applied to the link. """ from invenio.config import weburl from invenio.access_control_engine import acc_authorize_action out = "" uid = bfo.uid if uid is not None: (auth_code, auth_message) = acc_authorize_action(uid,'runbibedit') if auth_code == 0: print_style = '' if style != '': print_style = 'style="' + style + '"' out += 'Edit This Record' return out + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_external_publications.py b/modules/bibformat/lib/elements/bfe_external_publications.py index 42c2bd76b..5cfedd181 100644 --- a/modules/bibformat/lib/elements/bfe_external_publications.py +++ b/modules/bibformat/lib/elements/bfe_external_publications.py @@ -1,30 +1,37 @@ ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints links to external publications """ __revision__ = "$Id$" def format(bfo, separator='
'): """ Prints list of links to external publications. """ publications = bfo.fields('909C4') out = map(lambda x: ''+x['p']+'', publications) return separator.join(out) + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_fulltext.py b/modules/bibformat/lib/elements/bfe_fulltext.py index 474913431..306e665bf 100644 --- a/modules/bibformat/lib/elements/bfe_fulltext.py +++ b/modules/bibformat/lib/elements/bfe_fulltext.py @@ -1,42 +1,49 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints a links to fulltext """ __revision__ = "$Id$" from cgi import escape def format(bfo, style, separator='; '): """ This is the default format for formatting full-text reference. @param separator the separator between urls. @param style CSS class of the link """ urls_u = bfo.fields("8564_u") if style != "": style = 'class="'+style+'"' # Build urls list. # Escape special chars for tag value. urls = [''+escape(url)+'' for url in urls_u] return separator.join(urls) + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_keywords.py b/modules/bibformat/lib/elements/bfe_keywords.py index 263d79935..c336c8fb8 100644 --- a/modules/bibformat/lib/elements/bfe_keywords.py +++ b/modules/bibformat/lib/elements/bfe_keywords.py @@ -1,46 +1,52 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints keywords """ __revision__ = "$Id$" def format(bfo, keyword_prefix, keyword_suffix, separator=' ; ', link='yes'): """ Display keywords of the record. @param keyword_prefix a prefix before each keyword @param keyword_suffix a suffix after each keyword @param separator a separator between keywords @param link links the keywords if 'yes' (HTML links) """ from urllib import quote from invenio.config import weburl keywords = bfo.fields('6531_a') if len(keywords) > 0: if link == 'yes': keywords = map(lambda x: ''+x+'', keywords) keywords = map(lambda x: keyword_prefix+x+keyword_suffix, keywords) return separator.join(keywords) +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_photo_resources.py b/modules/bibformat/lib/elements/bfe_photo_resources.py index 58ed8114a..dfd624915 100644 --- a/modules/bibformat/lib/elements/bfe_photo_resources.py +++ b/modules/bibformat/lib/elements/bfe_photo_resources.py @@ -1,42 +1,49 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints HTML picture and links to resources """ __revision__ = "$Id$" def format(bfo): """ Prints html image and link to photo resources. """ resources = bfo.fields("8564_") out = "" for resource in resources: if resource.get("x", "") == "icon" and resource.get("u", "") == "": out += '

' if resource.get("x", "") == "1": out += '
High resolution: '+ resource.get("q", "") +"" out += "
© CERN Geneva" out += '
'+ bfo.field("8564_z") + "" return out + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_photo_resources_brief.py b/modules/bibformat/lib/elements/bfe_photo_resources_brief.py index 9cd3a98c3..e5c7b362e 100644 --- a/modules/bibformat/lib/elements/bfe_photo_resources_brief.py +++ b/modules/bibformat/lib/elements/bfe_photo_resources_brief.py @@ -1,38 +1,45 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints brief HTML picture and links to resources """ __revision__ = "$Id$" def format(bfo): """ Prints html image and link to photo resources. """ from invenio.config import weburl resources = bfo.fields("8564_") out = "" for resource in resources: if resource.get("x", "") == "icon" and resource.get("u", "") == "": out += '' return out + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_publi_info.py b/modules/bibformat/lib/elements/bfe_publi_info.py index 1762ea933..b09e551e8 100644 --- a/modules/bibformat/lib/elements/bfe_publi_info.py +++ b/modules/bibformat/lib/elements/bfe_publi_info.py @@ -1,74 +1,91 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints publcation information and link to ejournal """ __revision__ = "$Id$" +from urllib import quote +import cgi + def format(bfo): """ Displays inline publication information with html link to ejournal (when available). """ - from urllib import quote + out = '' publication_info = bfo.field('909C4') if publication_info == "": return "" - journal = bfo.kb('ejournals', publication_info.get('p')) + journal_source = publication_info.get('p') + journal = bfo.kb('ejournals', journal_source) volume = publication_info.get('v') year = publication_info.get('y') number = publication_info.get('n') pages = publication_info.get('c') if journal != '' and volume is not None: + + journal = cgi.escape(journal) + volume = cgi.escape(volume) + year = cgi.escape(year) + number = cgi.escape(number) + pages = cgi.escape(pages) + out += '%(journal)s :%(volume)s %(year)s %(page)s' % {'journal': journal, 'volume': volume, 'year': year, 'page': pages} else: - out += publication_info.get('p') + ': ' + out += journal_source + ': ' if volume is not None: out += volume if year is not None: out += ' (' + year + ') ' if number is not None: out += 'no. ' + number + ', ' if pages is not None: out += 'pp. ' + pages return out +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_references.py b/modules/bibformat/lib/elements/bfe_references.py index cfb710235..b1d08d948 100644 --- a/modules/bibformat/lib/elements/bfe_references.py +++ b/modules/bibformat/lib/elements/bfe_references.py @@ -1,63 +1,70 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints references """ __revision__ = "$Id$" def format(bfo, reference_prefix, reference_suffix): """ Prints the references of this record @param reference_prefix a prefix displayed before each reference @param reference_suffix a suffix displayed after each reference """ from invenio.config import weburl references = bfo.fields("999C5") out = "" for reference in references: if reference_prefix is not None: out += reference_prefix if reference.has_key('o'): out += "
  • "+ reference['o']+ " " if reference.has_key('m'): out += ""+ reference['m']+ " " if reference.has_key('r'): out += ' ['+ reference['r']+ "]
    " if reference.has_key('t'): ejournal = bfo.kb("ejournals", reference.get('t', "")) if ejournal != "": out += ' ' out += reference['t']+": "+reference.get('v', "")+" ("+reference.get('y', "")+") " out += reference.get('p', "")+"
    " else: out += " "+reference['t']+ reference.get('v', "")+ reference.get('y',"")+ reference.get('p',"")+ "
    " if reference_suffix is not None: out += reference_suffix return out + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_title.py b/modules/bibformat/lib/elements/bfe_title.py index 00577901e..62ed46403 100644 --- a/modules/bibformat/lib/elements/bfe_title.py +++ b/modules/bibformat/lib/elements/bfe_title.py @@ -1,70 +1,79 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints titles """ __revision__ = "$Id$" +import cgi + def format(bfo, separator=" ", highlight='no'): """ Prints the titles of a record. @param separator separator between the different titles @param highlight highlights the words corresponding to search query if set to 'yes' """ titles = [] title = bfo.field('245__a') title_remainder = bfo.field('245__b') if len(title) > 0: titles.append( title + title_remainder ) title = bfo.field('0248_a') if len(title) > 0: titles.append( title ) title = bfo.field('246__a') if len(title) > 0: titles.append( title ) title = bfo.field('246_1a') if len(title) > 0: titles.append( title ) if len(titles) > 0: #Display 'Conference' title only if other titles were not found title = bfo.field('111__a') if len(title) > 0: titles.append( title ) - + + titles = [cgi.escape(x) for x in titles] + if highlight == 'yes': from invenio import bibformat_utils titles = [bibformat_utils.highlight(x, bfo.search_pattern) for x in titles] return separator.join(titles) - +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_topbanner.py b/modules/bibformat/lib/elements/bfe_topbanner.py index b14da77c8..5318ca3ed 100644 --- a/modules/bibformat/lib/elements/bfe_topbanner.py +++ b/modules/bibformat/lib/elements/bfe_topbanner.py @@ -1,55 +1,62 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints HTML topbanner with categorz, rep. number, etc. """ __revision__ = "$Id$" def format(bfo): """ HTML top page banner containing category, rep. number, etc """ collection_indicator = bfo.kb("dbcollid2coll", bfo.field("980__a")) subject = bfo.field("65017a") subject_2 = bfo.field("65027a") additional_report_numbers = bfo.fields("088__a") source_of_aquisition = bfo.field("037__a") out = '' out += '''" for report_number in additional_report_numbers: out += "" if len(source_of_aquisition) > 0: out += '" out += "
    %s ''' % collection_indicator if subject != "XX": out += " / "+ subject out += subject_2 out += "" +report_number +" '+ source_of_aquisition + "

    " return out + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_url.py b/modules/bibformat/lib/elements/bfe_url.py index 0dfafae59..cc7430a4a 100644 --- a/modules/bibformat/lib/elements/bfe_url.py +++ b/modules/bibformat/lib/elements/bfe_url.py @@ -1,37 +1,44 @@ # -*- coding: utf-8 -*- ## ## $Id$ ## ## This file is part of CDS Invenio. ## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN. ## ## CDS Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDS Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """BibFormat element - Prints full-text URLs """ __revision__ = "$Id$" def format(bfo, style, separator='; '): """ This is the default format for formatting full-text URLs. @param separator the separator between urls. @param style CSS class of the link """ urls_u = bfo.fields("8564_u") if style != "": style = 'class="'+style+'"' urls = map(lambda x: ''+x+'', urls_u) return separator.join(urls) + +def escape_values(bfo): + """ + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0