bibformat.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sun, Jun 23, 13:11

bibformat.py
View Options

	# -- coding: utf-8 --
	##
	## $Id$
	##
	## This file is part of CDS Invenio.
	## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
	##
	## CDS Invenio is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation; either version 2 of the
	## License, or (at your option) any later version.
	##
	## CDS Invenio is distributed in the hope that it will be useful, but
	## WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	## General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License
	## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
	## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	"""
	Format records using specified format.

	API functions: format_record, format_records, create_excel,
	get_output_format_content_type

	Used to wrap the BibFormat engine and associated functions. This is
	also where special formatting of multiple records (that the engine
	does not handle, as it works on a single record basis) should be put,
	with name create_*.

	SEE: bibformat_utils.py

	FIXME: currently copies record_exists() code from search engine. Refactor later.
	"""

	__revision__ = "$Id$"

	import zlib

	from invenio import bibformat_dblayer
	from invenio import bibformat_engine
	from invenio import bibformat_utils
	from invenio.errorlib import register_exception
	from invenio.config import \
	CFG_SITE_LANG, \
	CFG_PATH_PHP
	from invenio.bibformat_config import \
	CFG_BIBFORMAT_USE_OLD_BIBFORMAT, \
	CFG_BIBFORMAT_ENABLE_I18N_BRIEF_FORMAT
	try:
	import invenio.template
	websearch_templates = invenio.template.load('websearch')
	except:
	pass
	import getopt
	import sys

	# Functions to format a single record
	##

	def format_record(recID, of, ln=CFG_SITE_LANG, verbose=0, search_pattern=None,
	xml_record=None, user_info=None, on_the_fly=False):
	"""
	Formats a record given output format.

	Returns a formatted version of the record in the specified
	language, search pattern, and with the specified output format.
	The function will define which format template must be applied.

	The record to be formatted can be specified with its ID (with
	'recID' parameter) or given as XML representation(with
	'xml_record' parameter). If both are specified 'recID' is ignored.

	'user_info' allows to grant access to some functionalities on a
	page depending on the user's priviledges. The 'user_info' object
	makes sense only in the case of on-the-fly formatting. 'user_info'
	is the same object as the one returned by
	'webuser.collect_user_info(req)'

	@param recID the ID of record to format
	@param of an output format code (or short identifier for the output format)
	@param ln the language to use to format the record
	@param verbose the level of verbosity from 0 to 9 (O: silent,
	5: errors,
	7: errors and warnings, stop if error in format elements
	9: errors and warnings, stop if error (debug mode ))
	@param search_pattern list of strings representing the user request in web interface
	@param xml_record an xml string represention of the record to format
	@param user_info the information of the user who will view the formatted page (if applicable)
	@param on_the_fly if False, try to return an already preformatted version of the record in the database
	@return formatted record
	"""
	if search_pattern is None:
	search_pattern = []

	out = ""
	if verbose == 9:
	out += """\n<span class="quicknote">
	Formatting record %i with output format %s.
	</span>""" % (recID, of)
	############### FIXME: REMOVE WHEN MIGRATION IS DONE ###############
	if CFG_BIBFORMAT_USE_OLD_BIBFORMAT and CFG_PATH_PHP:
	return bibformat_engine.call_old_bibformat(recID, format=of, on_the_fly=on_the_fly)
	############################# END ##################################

	if not on_the_fly and \
	(ln == CFG_SITE_LANG or \
	of.lower() == 'xm' or \
	CFG_BIBFORMAT_USE_OLD_BIBFORMAT or \
	(CFG_BIBFORMAT_ENABLE_I18N_BRIEF_FORMAT == False and of.lower() == 'hb')):
	# Try to fetch preformatted record
	# Only possible for records formatted in CFG_SITE_LANG
	# language (other are never stored), or of='xm' which does not
	# depend on language.
	# Also, when formatting in HB, and when
	# CFG_BIBFORMAT_ENABLE_I18N_BRIEF_FORMAT is set to False,
	# ignore other languages and fetch the preformatted output.
	res = bibformat_dblayer.get_preformatted_record(recID, of)
	if res is not None:
	# record 'recID' is formatted in 'of', so return it
	if verbose == 9:
	last_updated = bibformat_dblayer.get_preformatted_record_date(recID, of)
	out += """\n<br/><span class="quicknote">
	Found preformatted output for record %i (cache updated on %s).
	</span><br/>""" % (recID, last_updated)
	out += res
	return out
	else:
	if verbose == 9:
	out += """\n<br/><span class="quicknote">
	No preformatted output found for record %s.
	</span>"""% recID


	# Live formatting of records in all other cases
	if verbose == 9:
	out += """\n<br/><span class="quicknote">
	Formatting record %i on-the-fly.
	</span>""" % recID

	try:
	out += bibformat_engine.format_record(recID=recID,
	of=of,
	ln=ln,
	verbose=verbose,
	search_pattern=search_pattern,
	xml_record=xml_record,
	user_info=user_info)
	return out
	except Exception, e:
	register_exception(prefix="An error occured while formatting record %i in %s" % \
	(recID, of),
	alert_admin=True)
	#Failsafe execution mode
	if verbose == 9:
	out += """\n<br/><span class="quicknote">
	An error occured while formatting record %i. (%s)
	</span>""" % (recID, str(e))
	if of.lower() == 'hd':
	if verbose == 9:
	out += """\n<br/><span class="quicknote">
	Formatting record %i with websearch_templates.tmpl_print_record_detailed.
	</span><br/>""" % recID
	return out + websearch_templates.tmpl_print_record_detailed(
	ln = ln,
	recID = recID,
	)
	if verbose == 9:
	out += """\n<br/><span class="quicknote">
	Formatting record %i with websearch_templates.tmpl_print_record_brief.
	</span><br/>""" % recID
	return out + websearch_templates.tmpl_print_record_brief(ln = ln,
	recID = recID,
	)


	def record_get_xml(recID, format='xm', decompress=zlib.decompress):
	"""
	Returns an XML string of the record given by recID.

	The function builds the XML directly from the database,
	without using the standard formatting process.

	'format' allows to define the flavour of XML:
	- 'xm' for standard XML
	- 'marcxml' for MARC XML
	- 'oai_dc' for OAI Dublin Core
	- 'xd' for XML Dublin Core

	If record does not exist, returns empty string.

	@param recID the id of the record to retrieve
	@return the xml string of the record
	"""
	return bibformat_utils.record_get_xml(recID=recID, format=format, decompress=decompress)

	# Helper functions to do complex formatting of multiple records
	#
	# You should not modify format_records when adding a complex
	# formatting of multiple records, but add a create_* method
	# that relies on format_records to do the formatting.
	##

	def format_records(recIDs, of, ln=CFG_SITE_LANG, verbose=0, search_pattern=None,
	xml_records=None, user_info=None, record_prefix=None,
	record_separator=None, record_suffix=None, prologue="",
	epilogue="", req=None, on_the_fly=False):
	"""
	Returns a list of formatted records given by a list of record IDs
	or a list of records as xml. Adds a prefix before each record, a
	suffix after each record, plus a separator between records.

	Also add optional prologue and epilogue to the complete formatted
	list.

	You can either specify a list of record IDs to format, or a list
	of xml records, but not both (if both are specified recIDs is
	ignored).

	'record_separator' is a function that returns a string as
	separator between records. The function must take an integer as
	unique parameter, which is the index in recIDs (or xml_records) of
	the record that has just been formatted. For example separator(i)
	must return the separator between recID[i] and recID[i+1].
	Alternatively separator can be a single string, which will be used
	to separate all formatted records. The same applies to
	'record_prefix' and 'record_suffix'.

	'req' is an optional parameter on which the result of the function
	are printed lively (prints records after records) if it is given.
	Note that you should set 'req' content-type by yourself, and send
	http header before calling this function as it will not do it.

	This function takes the same parameters as 'format_record' except for:
	@param recIDs a list of record IDs
	@param xml_records a list of xml string representions of the records to format
	@param header a string printed before all formatted records
	@param separator either a string or a function that returns string to separate formatted records
	@param req an optional request object where to print records
	@param on_the_fly if False, try to return an already preformatted version of the record in the database
	"""
	if req is not None:
	req.write(prologue)

	formatted_records = ''

	#Fill one of the lists with Nones
	if xml_records is not None:
	recIDs = map(lambda x:None, xml_records)
	else:
	xml_records = map(lambda x:None, recIDs)

	total_rec = len(recIDs)
	last_iteration = False
	for i in range(total_rec):
	if i == total_rec - 1:
	last_iteration = True

	#Print prefix
	if record_prefix is not None:
	if isinstance(record_prefix, str):
	formatted_records += record_prefix
	if req is not None:
	req.write(record_prefix)
	else:
	string_prefix = record_prefix(i)
	formatted_records += string_prefix
	if req is not None:
	req.write(string_prefix)

	#Print formatted record
	formatted_record = format_record(recIDs[i], of, ln, verbose, \
	search_pattern, xml_records[i],\
	user_info, on_the_fly)
	formatted_records += formatted_record
	if req is not None:
	req.write(formatted_record)

	#Print suffix
	if record_suffix is not None:
	if isinstance(record_suffix, str):
	formatted_records += record_suffix
	if req is not None:
	req.write(record_suffix)
	else:
	string_suffix = record_suffix(i)
	formatted_records += string_suffix
	if req is not None:
	req.write(string_suffix)

	#Print separator if needed
	if record_separator is not None and not last_iteration:
	if isinstance(record_separator, str):
	formatted_records += record_separator
	if req is not None:
	req.write(record_separator)
	else:
	string_separator = record_separator(i)
	formatted_records += string_separator
	if req is not None:
	req.write(string_separator)

	if req is not None:
	req.write(epilogue)

	return prologue + formatted_records + epilogue

	def create_excel(recIDs, req=None, ln=CFG_SITE_LANG):
	"""
	Returns an Excel readable format containing the given recIDs.
	If 'req' is given, also prints the output in 'req' while individual
	records are being formatted.

	This method shows how to create a custom formatting of multiple
	records.
	The excel format is a basic HTML table that most spreadsheets
	applications can parse.

	@param recIDs a list of record IDs
	@return a string in Excel format
	"""
	# Prepare the column headers to display in the Excel file
	column_headers_list = ['Title',
	'Authors',
	'Addresses',
	'Affiliation',
	'Date',
	'Publisher',
	'Place',
	'Abstract',
	'Keywords',
	'Notes']

	# Prepare Content
	column_headers = '</b></td><td style="border-color:black; border-style:solid; border-width:thin; background-color:black;color:white"><b>'.join(column_headers_list) + ''
	column_headers = '<table style="border-collapse: collapse;">\n'+ '<td style="border-color:black; border-style:solid; border-width:thin; background-color:black;color:white"><b>' + column_headers + '</b></td>'
	footer = '</table>'

	#Apply content_type and print column headers
	if req is not None:
	req.content_type = get_output_format_content_type('excel')
	req.headers_out["Content-Disposition"] = "inline; filename=%s" % 'results.xls'
	req.send_http_header()

	#Format the records
	excel_formatted_records = format_records(recIDs, 'excel', ln=CFG_SITE_LANG,
	record_separator='\n',
	prologue = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><table>',
	epilogue = footer,
	req=req)

	return excel_formatted_records

	# Utility functions
	##

	def get_output_format_content_type(of):
	"""
	Returns the content type (eg. 'text/html' or 'application/ms-excel') \
	of the given output format.

	@param of the code of output format for which we want to get the content type
	"""
	content_type = bibformat_dblayer.get_output_format_content_type(of)

	if content_type == '':
	content_type = 'text/html'

	return content_type

	def usage(exitcode=1, msg=""):
	"""Prints usage info."""
	if msg:
	sys.stderr.write("Error: %s.\n" % msg)
	print """BibFormat: outputs the result of the formatting of a record.

	Usage: bibformat required [options]
	Examples:
	$ bibformat -i 10 -o HB
	$ bibformat -i 10,11,13 -o HB
	$ bibformat -i 10:13
	$ bibformat -i 10 -o HB -v 9

	Required:
	-i, --id=ID[ID2,ID3:ID5] ID (or range of IDs) of the record(s) to be formatted.

	Options:
	-o, --output=CODE short code of the output format used for formatting (default HB).
	-l, --lang=LN language used for formatting.
	-y, --onthefly on-the-fly formatting, avoiding caches created by BibReformat.

	General options:
	-h, --help print this help and exit
	-v, --verbose=LEVEL verbose level (from 0 to 9, default 0)
	-V --version print the script version
	"""
	sys.exit(exitcode)

	def main():
	"""main entry point for biformat via command line"""

	options = {} # will hold command-line options
	options["verbose"] = 0
	options["onthefly"] = False
	options["lang"] = CFG_SITE_LANG
	options["output"] = "HB"
	options["recID"] = None

	try:
	opts, args = getopt.getopt(sys.argv[1:],
	"hVv:yl:i:o:",
	["help",
	"version",
	"verbose=",
	"onthefly",
	"lang=",
	"id=",
	"output="])
	except getopt.GetoptError, err:
	usage(1, err)
	pass
	try:
	for opt in opts:
	if opt[0] in ["-h", "--help"]:
	usage(0)
	elif opt[0] in ["-V", "--version"]:
	print __revision__
	sys.exit(0)
	elif opt[0] in ["-v", "--verbose"]:
	options["verbose"] = int(opt[1])
	elif opt[0] in ["-y", "--onthefly"]:
	options["onthefly"] = True
	elif opt[0] in ["-l", "--lang"]:
	options["lang"] = opt[1]
	elif opt[0] in ["-i", "--id"]:
	recIDs = []
	for recID in opt[1].split(','):
	if ":" in recID:
	start = int(recID.split(':')[0])
	end = int(recID.split(':')[1])
	recIDs.extend(range(start, end))
	else:
	recIDs.append(int(recID))
	options["recID"] = recIDs
	elif opt[0] in ["-o", "--output"]:
	options["output"] = opt[1]

	if options["recID"] == None:
	usage(1, "-i argument is needed")
	except StandardError, e:
	usage(e)



	print format_records(recIDs=options["recID"],
	of=options["output"],
	ln=options["lang"],
	verbose=options["verbose"],
	on_the_fly=options["onthefly"])

	return

	if __name__ == "__main__":
	main()

bibformat.pyNo OneTemporaryActions

File Metadata

bibformat.pyView Options

Event Timeline

bibformat.py
No OneTemporary
Actions

bibformat.py
View Options