bibrecord.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Mon, Sep 30, 16:23

bibrecord.py
View Options

	# -- coding: utf-8 --
	##
	## $Id$
	##
	## This file is part of CDS Invenio.
	## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
	##
	## CDS Invenio is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation; either version 2 of the
	## License, or (at your option) any later version.
	##
	## CDS Invenio is distributed in the hope that it will be useful, but
	## WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	## General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License
	## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
	## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	"""
	BibRecord - XML MARC processing library for CDS Invenio.

	For API, see create_record(), record_get_field_instances() and friends
	in the source code of this file in the section entitled INTERFACE.

	Note: Does not access the database, the input is MARCXML only.
	"""

	__revision__ = "$Id$"

	### IMPORT INTERESTING MODULES AND XML PARSERS

	import string
	import re
	try:
	import psyco
	psycho_available = 1
	except ImportError:
	psycho_available = 0

	from invenio.bibrecord_config import CFG_MARC21_DTD, \
	CFG_BIBRECORD_WARNING_MSGS, \
	CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL, \
	CFG_BIBRECORD_DEFAULT_CORRECT, \
	CFG_BIBRECORD_PARSERS_AVAILABLE

	# find out about the best usable parser:
	err = []
	parser = -1
	if 2 in CFG_BIBRECORD_PARSERS_AVAILABLE:
	try:
	import pyRXP
	parser = 2
	def warnCB(s):
	""" function used to treat the PyRXP parser warnings"""
	global err
	err.append((0, 'Parse warning:\n'+s))
	except ImportError:
	pass
	if parser == -1 and \
	1 in CFG_BIBRECORD_PARSERS_AVAILABLE:
	try:
	from Ft.Xml.Domlette import NonvalidatingReader
	parser = 1
	except ImportError:
	pass
	if parser == -1 and \
	0 in CFG_BIBRECORD_PARSERS_AVAILABLE:
	try:
	from xml.dom.minidom import parseString
	parser = 0
	except ImportError:
	pass

	### INTERFACE / VISIBLE FUNCTIONS

	def create_records(xmltext,
	verbose=CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL,
	correct=CFG_BIBRECORD_DEFAULT_CORRECT):
	"""
	Create list of record from XMLTEXT. Return a list of objects
	initiated by create_record() function; please see that function's
	docstring.
	"""
	global parser
	err = []

	if parser == -1:
	err.append((6, "import error"))
	else:
	pat = r"<record.?>.?</record>"
	p = re.compile(pat, re.DOTALL) # DOTALL - to ignore whitespaces
	alist = p.findall(xmltext)

	listofrec = map((lambda x:create_record(x, verbose, correct)),
	alist)
	return listofrec
	return []

	# Record :: {tag : [Field]}
	# Field :: (Subfields,ind1,ind2,value)
	# Subfields :: [(code,value)]

	def create_record(xmltext,
	verbose=CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL,
	correct=CFG_BIBRECORD_DEFAULT_CORRECT):
	"""
	Create a record object from XMLTEXT and return it.

	Uses pyRXP if installed else uses 4Suite domlette or xml.dom.minidom.

	The returned object is a tuple (record, status_code, list_of_errors), where
	status_code is 0 when there are errors, 1 when no errors.

	The return record structure is as follows:
	Record := {tag : [Field]}
	Field := (Subfields, ind1, ind2, value)
	Subfields := [(code, value)]

	For example:
	______
	\|record\|
	------
	__________________________\|____________________________________________
	\|record['001'] \|record['909'] \|record['520'] \|
	\| \| \| \|
	[list of fields] [list of fields] [list of fields] ...
	\| ______\|______________ \|
	\|[0] \|[0] \|[1] \| \|[0]
	___\|_____ _____\|___ ___\|_____ ... ____\|____
	\|Field 001\| \|Field 909\| \|Field 909\| \|Field 520\|
	--------- --------- --------- ---------
	\| __________________\|______________ \| \|
	... \|[0] \|[1] \|[2] \| ... ...
	\| \| \| \|
	[list of subfields] 'C' '4'
	___\|_______________________________________________
	\| \| \|
	('a', 'a value') ('b', 'value for subfield b') ('a', 'another value for another a')

	@param xmltext an XML string representation of the record to create
	@param verbose the level of verbosity: 0(silent) 1-2 (warnings) 3(strict:stop when errors)
	@param correct 1 to enable correction on XML. Else 0.
	@return a tuple (record, status_code, list_of_errors), where status_code is 0 where there are errors, 1 when no errors
	"""
	global parser

	try:
	if parser == 2:
	## the following is because of DTD validation
	t = """<?xml version="1.0" encoding="UTF-8"?>
	<!DOCTYPE collection SYSTEM "file://%s">
	<collection>\n""" % CFG_MARC21_DTD
	t = "%s%s" % (t, xmltext)
	t = "%s</collection>" % t
	xmltext = t
	(rec, er) = create_record_RXP(xmltext, verbose, correct)
	elif parser == 1:
	(rec, er) = create_record_4suite(xmltext, verbose, correct)
	elif parser == 0:
	(rec, er) = create_record_minidom(xmltext, verbose, correct)
	else:
	(rec, er) = (None, "ERROR: No usable XML parsers found.")
	errs = warnings(er)
	except Exception, e:
	print e
	errs = warnings(concat(err))
	return (None, 0, errs)

	if errs == []:
	return (rec, 1, errs)
	else:
	return (rec, 0, errs)

	def record_get_field_instances(rec, tag="", ind1="", ind2=""):
	"""
	Returns the list of field instances for the specified tag and indicators
	of the record (rec).

	Returns empty list if not found.
	If tag is empty string, returns all fields

	Parameters (tag, ind1, ind2) can contain wildcard %.

	@param rec a record structure as returned by create_record()
	@param tag a 3 characters long string
	@param ind1 a 1 character long string
	@param ind2 a 1 character long string
	@param code a 1 character long string
	@return a list of field tuples (Subfields, ind1, ind2, value) where subfields is list of (code, value)
	"""
	out = []
	if tag:
	if '%' in tag:
	#Wildcard in tag. Check all possible
	for field_tag in rec.keys():
	if tag_matches_pattern(field_tag, tag):
	for possible_field_instance in rec[field_tag]:
	if (ind1 == '%' or possible_field_instance[1] == ind1) and \
	(ind2 == '%' or possible_field_instance[2] == ind2):
	out.append(possible_field_instance)
	else:
	#Completely defined tag. Use dict
	if record_has_field(rec, tag):
	for possible_field_instance in rec[tag]:
	if (ind1 == '%' or possible_field_instance[1] == ind1) and \
	(ind2 == '%' or possible_field_instance[2] == ind2):
	out.append(possible_field_instance)
	else:
	return rec.items()
	return out

	def record_has_field(rec, tag):
	"""checks whether record 'rec' contains tag 'tag'"""
	return rec.has_key(tag)

	def record_add_field(rec, tag, ind1="", ind2="", controlfield_value="", datafield_subfield_code_value_tuples=[]):
	"""
	Add a new field TAG to record REC with the following values:

	In case of creating a controlfield, only one argument matters:

	controlfield_value - value of the control field, in case
	this field is a controlfield.

	In case of creating a datafield, only these arguments matter:

	ind1, ind2 - indicators of the datafield

	datafield_subfield_code_value_tuples - list of subfield code and
	value tuples, e.g.: [('a', 'Ellis, J'), ('e', 'editor')]

	Return the field number of newly created field.
	"""

	# detect field number to be used for insertion:
	vals = rec.values()
	if vals != []:
	try:
	newfield_number = 1 + max([f[4] for v in vals for f in v])
	except ValueError:
	# vals could have been a list of empty lists, see test_add_delete_add_field_to_empty_record()
	newfield_number = 1
	else:
	newfield_number = 1

	# create new field object:
	if controlfield_value:
	newfield = ([], ind1, ind2, str(controlfield_value), newfield_number)
	else:
	newfield = (datafield_subfield_code_value_tuples, ind1, ind2, "", newfield_number)

	# add it to the record structure:
	if rec.has_key(tag):
	rec[tag].append(newfield)
	else:
	rec[tag] = [newfield]

	# return new field number:
	return newfield_number

	def record_delete_field(rec, tag, ind1="", ind2=""):
	"""
	delete all fields defined with marc tag 'tag' and indicators 'ind1' and 'ind2'
	from record 'rec'
	"""
	newlist = []
	if rec.has_key(tag):
	for field in rec[tag]:
	if not (field[1]==ind1 and field[2]==ind2):
	newlist.append(field)
	rec[tag] = newlist

	def tag_matches_pattern(tag, pattern):
	"""
	Returns true if MARC 'tag' matches a 'pattern'.

	'pattern' is plain text, with % as wildcard

	Both parameters must be 3 characters long strings.

	For e.g.
	>> tag_matches_pattern("909", "909") == True
	>> tag_matches_pattern("909", "9%9") == True
	>> tag_matches_pattern("909", "9%8") == False

	@param tag a 3 characters long string
	@param pattern a 3 characters long string
	@return False or True
	"""
	return (pattern[0] == '%' or tag[0] == pattern[0]) and \
	(pattern[1] == '%' or tag[1] == pattern[1]) and \
	(pattern[2] == '%' or tag[2] == pattern[2])

	def record_get_field_value(rec, tag, ind1="", ind2="", code=""):
	"""
	Returns first (string) value that matches specified field (tag, ind1, ind2, code)
	of the record (rec).

	Returns empty string if not found.

	Parameters (tag, ind1, ind2, code) can contain wildcard %.

	Difference between wildcard % and empty '':

	- Empty char specifies that we are not interested in a field which
	has one of the indicator(s)/subfield specified.

	- Wildcard specifies that we are interested in getting the value
	of the field whatever the indicator(s)/subfield is.

	For e.g. consider the following record in MARC:
	100C5 $$a val1
	555AB $$a val2
	555AB val3
	555 $$a val4
	555A val5

	>> record_get_field_value(record, '555', 'A', '', '')
	>> "val5"
	>> record_get_field_value(record, '555', 'A', '%', '')
	>> "val3"
	>> record_get_field_value(record, '555', 'A', '%', '%')
	>> "val2"
	>> record_get_field_value(record, '555', 'A', 'B', '')
	>> "val3"
	>> record_get_field_value(record, '555', '', 'B', 'a')
	>> ""
	>> record_get_field_value(record, '555', '', '', 'a')
	>> "val4"
	>> record_get_field_value(record, '555', '', '', '')
	>> ""
	>> record_get_field_value(record, '%%%', '%', '%', '%')
	>> "val1"


	@param rec a record structure as returned by create_record()
	@param tag a 3 characters long string
	@param ind1 a 1 character long string
	@param ind2 a 1 character long string
	@param code a 1 character long string
	@return string value (empty if nothing found)
	"""
	## Note: the code is quite redundant for speed reasons (avoid calling
	## functions or doing tests inside loops)

	if '%' in tag:
	#Wild card in tag. Must find all corresponding fields
	#fields_for_tag = (rec[field_tag] for field_tag in rec.keys() if tag_matches_pattern(field_tag, tag))
	if code == '':
	#Code not specified.
	for field_tag in rec.keys():
	if tag_matches_pattern(field_tag, tag):
	fields = rec[field_tag]
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2):
	#Return matching field value if not empty
	if field[3] != "":
	return field[3]
	elif code == '%':
	#Code is wildcard. Take first subfield of first matching field
	for field_tag in rec.keys():
	if tag_matches_pattern(field_tag, tag):
	fields = rec[field_tag]
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2) and \
	(len(field[0]) > 0):
	return field[0][0][1]
	else:
	#Code is specified. Take corresponding one
	for field_tag in rec.keys():
	if tag_matches_pattern(field_tag, tag):
	fields = rec[field_tag]
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2):
	for subfield in field[0]:
	if subfield[0] == code:
	return subfield[1]

	else:
	#Tag is completely specified. Use tag as dict key
	if rec.has_key(tag):
	fields = rec[tag]
	if code == '':
	#Code not specified.
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2):
	#Return matching field value if not empty
	#or return "" empty if not exist.
	if field[3] != "":
	return field[3]

	elif code == '%':
	#Code is wildcard. Take first subfield of first matching field
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2) and \
	(len(field[0]) > 0):
	return field[0][0][1]
	else:
	#Code is specified. Take corresponding one
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2):
	for subfield in field[0]:
	if subfield[0] == code:
	return subfield[1]
	#Nothing was found
	return ""

	def record_get_field_values(rec, tag, ind1="", ind2="", code=""):
	"""
	Returns the list of (string) values for the specified field (tag, ind1, ind2, code)
	of the record (rec).

	Returns empty list if not found.

	Parameters (tag, ind1, ind2, code) can contain wildcard %.

	@param rec a record structure as returned by create_record()
	@param tag a 3 characters long string
	@param ind1 a 1 character long string
	@param ind2 a 1 character long string
	@param code a 1 character long string
	@return a list of strings
	"""
	tmp = []

	if '%' in tag:
	# Wild card in tag. Must find all corresponding tags and fields
	keys = rec.keys()
	tags = [k for k in keys if tag_matches_pattern(k, tag)]
	if code == '' :
	#Code not specified. Consider field value (without subfields)
	for tag in tags:
	fields = rec[tag]
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2) and field[3] != '':
	tmp.append(field[3])
	elif code == '%':
	#Code is wildcard. Consider all subfields
	for tag in tags:
	fields = rec[tag]
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2):
	for subfield in field[0]:
	tmp.append(subfield[1])
	else:
	#Code is specified. Consider all corresponding subfields
	for tag in tags:
	fields = rec[tag]
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2):
	for subfield in field[0]:
	if subfield[0] == code:
	tmp.append(subfield[1])
	else:
	#Tag is completely specified. Use tag as dict key
	if rec.has_key(tag):
	fields = rec[tag]
	if code == '' :
	#Code not specified. Consider field value (without subfields)
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2) and field[3] != '':
	tmp.append(field[3])
	elif code == '%':
	#Code is wildcard. Consider all subfields
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2):
	for subfield in field[0]:
	tmp.append(subfield[1])
	else:
	#Code is specified. Take corresponding one
	for field in fields:
	if (ind1 == '%' or field[1] == ind1) and \
	(ind2 == '%' or field[2] == ind2):
	for subfield in field[0]:
	if subfield[0] == code:
	tmp.append(subfield[1])

	#Nothing was found
	return tmp

	def print_rec(rec, format=1):
	"""prints a record
	format = 1 -- XML
	format = 2 -- HTML (not implemented)
	"""

	if format == 1:
	text = record_xml_output(rec)
	else:
	return ''

	return text

	def print_recs(listofrec, format=1):
	"""prints a list of records
	format = 1 -- XML
	format = 2 -- HTML (not implemented)
	if 'listofrec' is not a list it returns empty string
	"""
	text = ""

	if type(listofrec).__name__ !='list':
	return ""
	else:
	for rec in listofrec:
	text = "%s\n%s" % (text, print_rec(rec, format))
	return text

	def record_xml_output(rec):
	"""generates the XML for record 'rec' and returns it as a string"""
	xmltext = "<record>\n"
	if rec:
	# add the tag 'tag' to each field in rec[tag]
	fields = []
	for tag in rec.keys():
	for field in rec[tag]:
	fields.append((tag, field))
	record_order_fields(fields)
	for field in fields:
	xmltext += str(field_xml_output(field[1], field[0]))
	xmltext = "%s</record>" % xmltext
	return xmltext

	def records_xml_output(listofrec):
	"""generates the XML for the list of records 'listofrec' and returns it as a string"""
	xmltext = """<?xml version="1.0" encoding="UTF-8"?>
	<!DOCTYPE collection SYSTEM "file://%s">
	<collection>\n""" % CFG_MARC21_DTD

	for rec in listofrec:
	xmltext = "%s%s" % (xmltext, record_xml_output(rec))
	xmltext = "%s</collection>" % xmltext
	return xmltext

	def field_get_subfield_instances(field):
	"""returns the list of subfields associated with field 'field'"""
	return field[0]

	def field_get_subfield_values(field_instance, code):
	"""Return subfield CODE values of the field instance FIELD."""
	out = []
	for sf_code, sf_value in field_instance[0]:
	if sf_code == code:
	out.append(sf_value)
	return out

	def field_add_subfield(field, code, value):
	"""adds a subfield to field 'field'"""
	field[0].append(create_subfield(code, value))


	### IMPLEMENTATION / INVISIBLE FUNCTIONS

	def create_record_RXP(xmltext,
	verbose=CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL,
	correct=CFG_BIBRECORD_DEFAULT_CORRECT):
	"""
	creates a record object and returns it
	uses the RXP parser

	If verbose>3 then the parser will be strict and will stop in case of well-formedness errors
	or DTD errors
	If verbose=0, the parser will not give warnings
	If 0<verbose<=3, the parser will not give errors, but will warn the user about possible mistakes

	correct != 0 -> We will try to correct errors such as missing attributtes
	correct = 0 -> there will not be any attempt to correct errors

	"""

	record = {}
	global err

	ord = 1 # this is needed because of the record_xml_output function, where we need to know
	# the order of the fields


	TAG, ATTRS, CHILD_LIST = range(3)

	if verbose > 3:
	p = pyRXP.Parser(ErrorOnValidityErrors=1,
	ProcessDTD=1,
	ErrorOnUnquotedAttributeValues=1,
	warnCB = warnCB,
	srcName='string input')
	else:
	p = pyRXP.Parser(ErrorOnValidityErrors=0,
	ProcessDTD=1,
	ErrorOnUnquotedAttributeValues=0,
	warnCB = warnCB,
	srcName='string input')


	if correct:
	(rec, e) = wash(xmltext)
	err.extend(e)
	return (rec, e)


	root1 = p(xmltext) #root = (tagname, attr_dict, child_list, reserved)

	if root1[0] == 'collection':
	recs = [t for t in root1[CHILD_LIST] if type(t).__name__ == 'tuple' and t[TAG] == "record"]
	if recs != []:
	root = recs[0]
	else:
	root = None
	else:
	root = root1



	# get childs of 'controlfield'
	childs_controlfield = []
	if not root[2] == None:
	childs_controlfield = [t for t in root[CHILD_LIST] if type(t).__name__ == 'tuple' and t[TAG] == "controlfield"]

	# get childs of 'datafield'
	childs_datafield = []
	if not root[CHILD_LIST] == None:
	childs_datafield = [t for t in root[CHILD_LIST] if type(t).__name__ == 'tuple' and t[TAG] == "datafield"]

	for controlfield in childs_controlfield:
	s = controlfield[ATTRS]["tag"]
	value = ''
	if not controlfield == None:
	value = ''.join([n for n in controlfield[CHILD_LIST] if type(n).__name__ == 'str'])

	name = type(value).__name__

	if name in ["int", "long"] :
	st = str(value)
	elif name in ['str', 'unicode']:
	st = value
	else:
	if verbose:
	err.append((7, 'Type found: ' + name))
	st = "" # the type of value is not correct. (user insert something like a list...)


	field = ([], "", "", st, ord) #field = (subfields, ind1, ind2,value,ord)

	if record.has_key(s):
	record[s].append(field)
	else:
	record[s] = [field]

	ord = ord + 1

	for datafield in childs_datafield:

	#create list of subfields
	subfields = []

	childs_subfield = []
	if not datafield[CHILD_LIST] == None:
	childs_subfield = [t for t in datafield[CHILD_LIST] if type(t).__name__ == 'tuple' and t[0] == "subfield"]

	for subfield in childs_subfield:
	value = ''
	if not subfield == None:
	value = ''.join([n for n in subfield[CHILD_LIST] if type(n).__name__ == 'str'])
	#get_string_value(subfield)
	if subfield[ATTRS].has_key('code'):
	subfields.append((subfield[ATTRS]["code"], value))
	else:
	subfields.append(('!', value))

	#create field

	if datafield[ATTRS].has_key('tag'):
	s = datafield[ATTRS]["tag"]
	else:
	s = '!'

	if datafield[ATTRS].has_key('ind1'):
	ind1 = datafield[ATTRS]["ind1"]
	else:
	ind1 = '!'

	if datafield[ATTRS].has_key('ind2'):
	ind2 = datafield[ATTRS]["ind2"]
	else:
	ind2 = '!'

	field = (subfields, ind1, ind2, "", ord)

	if record.has_key(s):
	record[s].append(field)
	else:
	record[s] = [field]

	ord = ord+1

	return (record, err)

	def create_record_minidom(xmltext,
	verbose=CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL,
	correct=CFG_BIBRECORD_DEFAULT_CORRECT):
	"""
	creates a record object and returns it
	uses xml.dom.minidom
	"""

	record = {}
	ord = 1
	global err

	if correct:
	xmlt = xmltext
	(rec, e) = wash(xmlt, 0)
	err.extend(e)
	return (rec, err)

	dom = parseString(xmltext)
	root = dom.childNodes[0]

	for controlfield in get_childs_by_tag_name(root, "controlfield"):
	s = controlfield.getAttribute("tag")

	text_nodes = controlfield.childNodes
	v = u''.join([ n.data for n in text_nodes ]).encode("utf-8")

	name = type(v).__name__
	if (name in ["int", "long"]) :
	field = ([], "", "", str(v), ord) # field = (subfields, ind1, ind2,value)
	elif name in ['str', 'unicode']:
	field = ([], "", "", v, ord)
	else:
	if verbose:
	err.append((7, 'Type found: ' + name))

	field = ([], "", "", "", ord)# the type of value is not correct. (user insert something like a list...)

	if record.has_key(s):
	record[s].append(field)
	else:
	record[s] = [field]
	ord = ord + 1

	for datafield in get_childs_by_tag_name(root, "datafield"):
	subfields = []

	for subfield in get_childs_by_tag_name(datafield, "subfield"):
	text_nodes = subfield.childNodes
	v = u''.join([ n.data for n in text_nodes ]).encode("utf-8")
	code = subfield.getAttributeNS(None,'code').encode("utf-8")
	if code != '':
	subfields.append((code, v))
	else:
	subfields.append(('!', v))

	s = datafield.getAttribute("tag").encode("utf-8")
	if s == '':
	s = '!'

	ind1 = datafield.getAttribute("ind1").encode("utf-8")

	ind2 = datafield.getAttribute("ind2").encode("utf-8")

	if record.has_key(s):
	record[s].append((subfields, ind1, ind2, "", ord))
	else:
	record[s] = [(subfields, ind1, ind2, "", ord)]
	ord = ord + 1

	return (record, err)


	def create_record_4suite(xmltext,
	verbose=CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL,
	correct=CFG_BIBRECORD_DEFAULT_CORRECT):
	"""
	creates a record object and returns it
	uses 4Suite domlette
	"""

	record = {}
	global err

	if correct:
	xmlt = xmltext
	(rec, e) = wash(xmlt, 1)
	err.extend(e)
	return (rec, e)

	dom = NonvalidatingReader.parseString(xmltext, "urn:dummy")

	root = dom.childNodes[0]

	ord = 1
	for controlfield in get_childs_by_tag_name(root, "controlfield"):
	s = controlfield.getAttributeNS(None, "tag")

	text_nodes = controlfield.childNodes
	v = u''.join([n.data for n in text_nodes]).encode("utf-8")

	name = type(v).__name__
	if (name in ["int", "long"]) :
	field = ([], "", "", str(v), ord) # field = (subfields, ind1, ind2,value)
	elif name in ['str', 'unicode']:
	field = ([], "", "", v, ord)
	else:
	if verbose:
	err.append((7, 'Type found: ' + name))

	field = ([], "", "", "", ord)# the type of value is not correct. (user insert something like a list...)


	if record.has_key(s):
	record[s].append(field)
	else:
	record[s] = [field]
	ord = ord + 1

	for datafield in get_childs_by_tag_name(root, "datafield"):
	subfields = []

	for subfield in get_childs_by_tag_name(datafield, "subfield"):
	text_nodes = subfield.childNodes
	v = u''.join([n.data for n in text_nodes]).encode("utf-8")

	code = subfield.getAttributeNS(None, 'code').encode("utf-8")
	if code != '':
	subfields.append((code, v))
	else:
	subfields.append(('!', v))

	s = datafield.getAttributeNS(None, "tag").encode("utf-8")
	if s == '':
	s = '!'

	ind1 = datafield.getAttributeNS(None, "ind1").encode("utf-8")

	ind2 = datafield.getAttributeNS(None, "ind2").encode("utf-8")

	if record.has_key(s):
	record[s].append((subfields, ind1, ind2, "", ord))
	else:
	record[s] = [(subfields, ind1, ind2, "", ord)]
	ord = ord + 1

	return (record, err)

	def record_order_fields(rec, fun="order_by_ord"):
	"""orders field inside record 'rec' according to a function"""
	rec.sort(eval(fun))
	return

	def record_order_subfields(rec, fun="order_by_code"):
	"""orders subfield inside record 'rec' according to a function"""
	for tag in rec:
	for field in rec[tag]:
	field[0].sort(eval(fun))
	return

	def concat(alist):
	"""concats a list of lists"""
	newl = []
	for l in alist:
	newl.extend(l)
	return newl

	def create_subfield(code, value):
	"""Create a subfield object and return it."""
	if type(value).__name__ in ["int", "long"]:
	s = str(value)
	else:
	s = value
	subfield = (code, s)
	return subfield

	def field_xml_output(field, tag):
	"""generates the XML for field 'field' and returns it as a string"""
	xmltext = ""
	if field[3] != "":
	xmltext = "%s <controlfield tag=\"%s\">%s</controlfield>\n" % (xmltext, tag, encode_for_xml(field[3]))
	else:
	xmltext = "%s <datafield tag=\"%s\" ind1=\"%s\" ind2=\"%s\">\n" % (xmltext, tag, field[1], field[2])
	for subfield in field[0]:
	xmltext = "%s%s" % (xmltext, subfield_xml_output(subfield))
	xmltext = "%s </datafield>\n" % xmltext
	return xmltext

	def subfield_xml_output(subfield):
	"""generates the XML for a subfield object and return it as a string"""
	xmltext = " <subfield code=\"%s\">%s</subfield>\n" % (subfield[0], encode_for_xml(subfield[1]))
	return xmltext

	def order_by_ord(field1, field2):
	"""function used to order the fields according to their ord value"""
	return cmp(field1[1][4], field2[1][4])

	def order_by_code(subfield1, subfield2):
	"""function used to order the subfields according to their code value"""
	return cmp(subfield1[0], subfield2[0])

	def get_childs_by_tag_name(node, local):
	"""retrieves all childs from node 'node' with name 'local' and returns them as a list"""
	cNodes = list(node.childNodes)
	res = [child for child in cNodes if child.nodeName == local]
	return res

	def get_string_value(node):
	"""gets all child text nodes of node 'node' and returns them as a unicode string"""
	text_nodes = node.childNodes
	return u''.join([ n.data for n in text_nodes ])

	def get_childs_by_tag_name_RXP(listofchilds, tag):
	"""retrieves all childs from 'listofchilds' with tag name 'tag' and returns them as a list.
	listofchilds is a list returned by the RXP parser
	"""
	l = []
	if not listofchilds == None:
	l = [t for t in listofchilds if type(t).__name__ == 'tuple' and t[0] == tag]
	return l

	def getAttribute_RXP(root, attr):
	""" returns the attributte 'attr' from root 'root'
	root is a node returned by RXP parser
	"""
	try:
	return u''.join(root[1][attr])
	except KeyError:
	return ""

	def get_string_value_RXP(node):
	"""gets all child text nodes of node 'node' and returns them as a unicode string"""
	if not node == None:
	return ''.join([ n for n in node[2] if type(n).__name__ == 'str'])
	else:
	return ""

	def encode_for_xml(s):
	"Encode special chars in string so that it would be XML-compliant."
	s = string.replace(s, '&', '&')
	s = string.replace(s, '<', '<')
	return s

	def print_errors(alist):
	""" creates a unique string with the strings in list, using '\n' as a separator """
	text = ""
	for l in alist:
	text = '%s\n%s'% (text, l)
	return text

	def wash(xmltext, parser=2):
	"""
	Check the structure of the xmltext. Returns a record structure and a list of errors.
	parser = 1 - 4_suite
	parser = 2 - pyRXP
	parser = 0 - minidom
	"""

	errors = []
	i, e1 = tagclose('datafield', xmltext)
	j, e2 = tagclose('controlfield', xmltext)
	k, e3 = tagclose('subfield', xmltext)
	w, e4 = tagclose('record', xmltext)
	errors.extend(e1)
	errors.extend(e2)
	errors.extend(e3)
	errors.extend(e4)

	if i and j and k and w and parser > -1:
	if parser == 2:
	(rec, ee) = create_record_RXP(xmltext, 0, 0)
	elif parser == 1:
	(rec, ee) = create_record_4suite(xmltext, 0, 0)
	elif parser == 0:
	(rec, ee) = create_record_minidom(xmltext, 0, 0)
	else:
	(rec, ee) = (None, "ERROR: No usable XML parsers found.")
	else:
	return (None, errors)

	keys = rec.keys()

	for tag in keys:
	upper_bound = '999'
	n = len(tag)

	if n > 3:
	i = n-3
	while i > 0:
	upper_bound = '%s%s' % ('0', upper_bound)
	i = i - 1

	if tag == '!': # missing tag
	errors.append((1, '(field number(s): ' + ([f[4] for f in rec[tag]]).__str__() + ')'))
	v = rec[tag]
	rec.__delitem__(tag)
	rec['000'] = v
	tag = '000'
	elif not (("001" <= tag <= upper_bound) or \
	tag in ('FMT', 'FFT')):
	errors.append(2)
	v = rec[tag]
	rec.__delitem__(tag)
	rec['000'] = v
	tag = '000'

	fields = []
	for field in rec[tag]:
	if field[0] == [] and field[3] == '': ## datafield without any subfield
	errors.append((8,'(field number: ' + field[4].__str__() + ')'))

	subfields = []
	for subfield in field[0]:
	if subfield[0] == '!':
	errors.append((3,'(field number: ' + field[4].__str__() + ')'))
	newsub = ('', subfield[1])
	else:
	newsub = subfield
	subfields.append(newsub)

	if field[1] == '!':
	errors.append((4,'(field number: ' + field[4].__str__() + ')'))
	ind1 = ""
	else:
	ind1 = field[1]

	if field[2] == '!':
	errors.append((5,'(field number: ' + field[4].__str__() + ')'))
	ind2 = ""
	else:
	ind2 = field[2]

	newf = (subfields, ind1, ind2, field[3], field[4])
	fields.append(newf)

	rec[tag] = fields

	return (rec, errors)

	def tagclose(tagname, xmltext):
	""" checks if an XML document does not hae any missing tag with name tagname
	"""
	errors = []
	pat_open = '<' + tagname + '.*?>'
	pat_close = '</' + tagname + '>'
	p_open = re.compile(pat_open, re.DOTALL) # DOTALL - to ignore whitespaces
	p_close = re.compile(pat_close, re.DOTALL)
	list1 = p_open.findall(xmltext)
	list2 = p_close.findall(xmltext)

	if len(list1)!=len(list2):
	errors.append((99,'(Tagname : ' + tagname + ')'))
	return (0, errors)
	else:
	return (1, errors)

	def warning(code):
	""" It returns a warning message of code 'code'.
	If code = (cd, str) it returns the warning message of code 'cd'
	and appends str at the end"""

	ws = CFG_BIBRECORD_WARNING_MSGS
	s = ''

	if type(code).__name__ == 'str':
	return code

	if type(code).__name__ == 'tuple':
	if type(code[1]).__name__ == 'str':
	s = code[1]
	c = code[0]
	else:
	c = code
	if ws.has_key(c):
	return ws[c]+s
	else:
	return ""

	def warnings(l):
	"""it applies the function warning to every element in l"""
	alist = []
	for w in l:
	alist.append(warning(w))
	return alist

	if psycho_available == 1:
	#psyco.full()
	psyco.bind(wash)
	psyco.bind(create_record_4suite)
	psyco.bind(create_record_RXP)
	psyco.bind(create_record_minidom)
	psyco.bind(record_order_subfields)
	psyco.bind(field_get_subfield_values)
	psyco.bind(create_records)
	psyco.bind(create_record)
	psyco.bind(record_get_field_instances)
	psyco.bind(record_get_field_value)
	psyco.bind(record_get_field_values)

bibrecord.pyNo OneTemporaryActions

File Metadata

bibrecord.pyView Options

Event Timeline

bibrecord.py
No OneTemporary
Actions

bibrecord.py
View Options