diff --git a/modules/bibedit/Makefile.am b/modules/bibedit/Makefile.am
index 8a0968291..7f66ad19b 100644
--- a/modules/bibedit/Makefile.am
+++ b/modules/bibedit/Makefile.am
@@ -1,22 +1,22 @@
## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-SUBDIRS = doc lib
+SUBDIRS = bin doc etc lib
CLEANFILES = *~
\ No newline at end of file
diff --git a/modules/bibedit/bin/.cvsignore b/modules/bibedit/bin/.cvsignore
new file mode 100644
index 000000000..a573d515a
--- /dev/null
+++ b/modules/bibedit/bin/.cvsignore
@@ -0,0 +1,6 @@
+Makefile
+Makefile.in
+z_*
+*.O
+*~
+xmlmarclint
\ No newline at end of file
diff --git a/modules/bibedit/lib/Makefile.am b/modules/bibedit/bin/Makefile.am
similarity index 82%
copy from modules/bibedit/lib/Makefile.am
copy to modules/bibedit/bin/Makefile.am
index e717cdbae..7809b963d 100644
--- a/modules/bibedit/lib/Makefile.am
+++ b/modules/bibedit/bin/Makefile.am
@@ -1,29 +1,29 @@
## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-pylibdir = $(libdir)/python/cdsware
-pylib_DATA = bibrecord.py
+bin_SCRIPTS=xmlmarclint
FILESWML = $(wildcard $(srcdir)/*.wml)
EXTRA_DIST = $(FILESWML:$(srcdir)/%=%)
-CLEANFILES = $(pylib_DATA) *~ *.tmp *.pyc
+CLEANFILES = $(bin_SCRIPTS) *~ *.tmp xmlmarclintc
-%.py: %.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml
+%: %.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml $(top_srcdir)/config/cdswmllib.wml
$(WML) -o $@ $<
+ chmod u+x $@
diff --git a/modules/bibedit/bin/xmlmarclint.in b/modules/bibedit/bin/xmlmarclint.in
new file mode 100644
index 000000000..b14e1c70d
--- /dev/null
+++ b/modules/bibedit/bin/xmlmarclint.in
@@ -0,0 +1,130 @@
+## $Id$
+## BibIndxes bibliographic data, reference and fulltext indexing utility.
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+## read config variables:
+#include "config.wml"
+#include "configbis.wml"
+#include "cdswmllib.wml"
+
+## start Python:
+#!
+# -*- coding: utf-8 -*-
+## $Id$
+## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.
+
+"""
+XML MARC lint - check your XML MARC files
+"""
+
+import getopt
+import string
+import sys
+
+pylibdir = "/python"
+
+try:
+ import sys
+ sys.path.append('%s' % pylibdir)
+ from cdsware.bibrecord import *
+except ImportError, e:
+ print "Error: %s" % e
+ import sys
+ sys.exit(1)
+
+cmdusage = """command usage:
+ %s [-v ] xmlfile
+ or
+ %s --help
+ """ % (sys.argv[0], sys.argv[0])
+helpmsg = cmdusage
+
+try:
+ opts,args=getopt.getopt(sys.argv[1:], "c:v:h:",["-help"])
+except getopt.GetoptError:
+ print cmdusage
+ sys.exit(2)
+
+badrecords = []
+listofrecs=[]
+verbose= 0
+
+if len(args)==1:
+ xmlfile = args[0]
+elif len(args)==0:
+ if len(opts)==1:
+ if opts[0][0] in ['-help','-h']:
+ print helpmsg
+ else:
+ print cmdusage
+ sys.exit(2)
+else:
+ print cmdusage
+ sys.exit(2)
+
+for opt in opts:
+ if not opt[0] in ['-v']:
+ print cmdusage
+ sys.exit(2)
+
+ if opt[0] == '-v':
+ try:
+ verbose = string.atoi(opt[1])
+ except ValueError:
+ print 'Verbose must be an integer'
+ sys.exit(2)
+
+global parser
+try:
+ f = open(xmlfile,'r')
+ xmltext = f.read()
+ f.close()
+except IOError:
+ print 'File not found\n Please check the name'
+ import sys
+ sys.exit(1)
+
+parser = parser
+
+listofrecs = create_records(xmltext,0,1)
+badr = filter((lambda x: x[1]==0),listofrecs)
+badrecords = map((lambda x:x[0]),badr)
+
+
+s=''
+e=''
+
+if verbose:
+ if verbose <=3:
+ e=print_errors(concat(map((lambda x:x[2]),listofrecs)))
+ else:
+ s=print_recs(badrecords)
+ e=print_errors(concat(map((lambda x:x[2]),listofrecs)))
+else:
+ if badrecords !=[]:
+ print 'Bad records detected! For more information, set verbosity.'
+ sys.exit(1)
+
+if s!='' or e!='':
+ print s
+ print e
+ sys.exit(1)
+
+
+
diff --git a/modules/bibedit/bin/xmlmarclint.wml b/modules/bibedit/bin/xmlmarclint.wml
new file mode 100644
index 000000000..b14e1c70d
--- /dev/null
+++ b/modules/bibedit/bin/xmlmarclint.wml
@@ -0,0 +1,130 @@
+## $Id$
+## BibIndxes bibliographic data, reference and fulltext indexing utility.
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+## read config variables:
+#include "config.wml"
+#include "configbis.wml"
+#include "cdswmllib.wml"
+
+## start Python:
+#!
+# -*- coding: utf-8 -*-
+## $Id$
+## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.
+
+"""
+XML MARC lint - check your XML MARC files
+"""
+
+import getopt
+import string
+import sys
+
+pylibdir = "/python"
+
+try:
+ import sys
+ sys.path.append('%s' % pylibdir)
+ from cdsware.bibrecord import *
+except ImportError, e:
+ print "Error: %s" % e
+ import sys
+ sys.exit(1)
+
+cmdusage = """command usage:
+ %s [-v ] xmlfile
+ or
+ %s --help
+ """ % (sys.argv[0], sys.argv[0])
+helpmsg = cmdusage
+
+try:
+ opts,args=getopt.getopt(sys.argv[1:], "c:v:h:",["-help"])
+except getopt.GetoptError:
+ print cmdusage
+ sys.exit(2)
+
+badrecords = []
+listofrecs=[]
+verbose= 0
+
+if len(args)==1:
+ xmlfile = args[0]
+elif len(args)==0:
+ if len(opts)==1:
+ if opts[0][0] in ['-help','-h']:
+ print helpmsg
+ else:
+ print cmdusage
+ sys.exit(2)
+else:
+ print cmdusage
+ sys.exit(2)
+
+for opt in opts:
+ if not opt[0] in ['-v']:
+ print cmdusage
+ sys.exit(2)
+
+ if opt[0] == '-v':
+ try:
+ verbose = string.atoi(opt[1])
+ except ValueError:
+ print 'Verbose must be an integer'
+ sys.exit(2)
+
+global parser
+try:
+ f = open(xmlfile,'r')
+ xmltext = f.read()
+ f.close()
+except IOError:
+ print 'File not found\n Please check the name'
+ import sys
+ sys.exit(1)
+
+parser = parser
+
+listofrecs = create_records(xmltext,0,1)
+badr = filter((lambda x: x[1]==0),listofrecs)
+badrecords = map((lambda x:x[0]),badr)
+
+
+s=''
+e=''
+
+if verbose:
+ if verbose <=3:
+ e=print_errors(concat(map((lambda x:x[2]),listofrecs)))
+ else:
+ s=print_recs(badrecords)
+ e=print_errors(concat(map((lambda x:x[2]),listofrecs)))
+else:
+ if badrecords !=[]:
+ print 'Bad records detected! For more information, set verbosity.'
+ sys.exit(1)
+
+if s!='' or e!='':
+ print s
+ print e
+ sys.exit(1)
+
+
+
diff --git a/modules/bibedit/etc/.cvsignore b/modules/bibedit/etc/.cvsignore
new file mode 100644
index 000000000..a3409fca7
--- /dev/null
+++ b/modules/bibedit/etc/.cvsignore
@@ -0,0 +1,5 @@
+Makefile
+Makefile.in
+z_*
+*.O
+*~
\ No newline at end of file
diff --git a/modules/bibedit/etc/MARC21slim.dtd b/modules/bibedit/etc/MARC21slim.dtd
new file mode 100644
index 000000000..5edb404b0
--- /dev/null
+++ b/modules/bibedit/etc/MARC21slim.dtd
@@ -0,0 +1,61 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/modules/bibedit/Makefile.am b/modules/bibedit/etc/Makefile.am
similarity index 88%
copy from modules/bibedit/Makefile.am
copy to modules/bibedit/etc/Makefile.am
index 8a0968291..12bc9c459 100644
--- a/modules/bibedit/Makefile.am
+++ b/modules/bibedit/etc/Makefile.am
@@ -1,22 +1,25 @@
## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-SUBDIRS = doc lib
+etcdir = $(sysconfdir)/bibedit/
+etc_DATA = MARC21slim.dtd
-CLEANFILES = *~
\ No newline at end of file
+EXTRA_DIST = $(etc_DATA)
+
+CLEANFILES = *~ *.tmp
\ No newline at end of file
diff --git a/modules/bibedit/lib/Makefile.am b/modules/bibedit/lib/Makefile.am
index e717cdbae..737bbbef7 100644
--- a/modules/bibedit/lib/Makefile.am
+++ b/modules/bibedit/lib/Makefile.am
@@ -1,29 +1,29 @@
## $Id$
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
pylibdir = $(libdir)/python/cdsware
-pylib_DATA = bibrecord.py
+pylib_DATA = bibrecord.py bibrecord_config.py bibrecord_tests.py
FILESWML = $(wildcard $(srcdir)/*.wml)
EXTRA_DIST = $(FILESWML:$(srcdir)/%=%)
CLEANFILES = $(pylib_DATA) *~ *.tmp *.pyc
%.py: %.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml
$(WML) -o $@ $<
diff --git a/modules/bibedit/lib/bibrecord.py b/modules/bibedit/lib/bibrecord.py
index 7b7e0e3a3..cf966f357 100644
--- a/modules/bibedit/lib/bibrecord.py
+++ b/modules/bibedit/lib/bibrecord.py
@@ -1,280 +1,966 @@
## $Id$
-##
+
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-## read config variables:
-#include "config.wml"
-#include "configbis.wml"
-#include "cdswmllib.wml"
-
-## start Python:
-#!
# -*- coding: utf-8 -*-
## $Id$
## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.
+
"""
-BibRecord - xml data processing system for CDSware
+BibRecord - XML MARC processing library for CDSware
"""
-## fill config variables:
-pylibdir = "/python"
-
+### IMPORT INTERESTING MODULES AND XML PARSERS
+
## import interesting modules:
try:
import sys
- from xml.dom import Node
- from xml.sax.saxutils import escape
+ import re
from zlib import decompress
+ import_error = 0
except ImportError, e:
- print "Error: %s" % e
- import sys
- sys.exit(1)
-
+ import_error = 1
+ imperr = e
+
+## test available parsers:
try:
- from Ft.Xml.Domlette import NonvalidatingReader
- is4Suite = 1
-except ImportError,e :
- try:
- from xml.dom.minidom import parseString
- is4Suite = 0
- except ImportError,e :
- print "Error: %s" % e
- import sys
- sys.exit(1)
+ import sys
+ import string
+ err=[]
+except ImportError, e:
+ parser = -3
+ err1 = e
try:
- sys.path.append('%s' % pylibdir)
- from cdsware.config import *
- from cdsware.dbquery import run_sql
+ from bibrecord_config import *
+ verbose = cfg_bibrecord_default_verbose_level
+ correct = cfg_bibrecord_default_correct
+ parsers = cfg_bibrecord_parsers_available
except ImportError, e:
- print "Error: %s" % e
- import sys
- sys.exit(1)
+ parser = -2
+ verbose = 0
+ correct = 0
+ parsers = []
-def create_record(xmltext):
- """
- creates a record object and returns it
- uses 4Suite domlette if installed else uses xml.dom.minidom
- """
- global is4Suite
- try:
- if is4Suite:
- rec = create_record_4suite(xmltext)
+if parsers == []:
+ print 'No parser available'
+ sys.exit(2)
+else:
+ j,i=1,1
+ if 2 in parsers:
+ try:
+ import pyRXP
+ parser = 2
+ ## function to show the pyRXP_parser warnings ##
+ def warnCB(s):
+ """ function used to treat the PyRXP parser warnings"""
+ global err
+ err.append((0,'Parse warning:\n'+s))
+
+ err2 = ""
+ except ImportError,e :
+ err2=e
+ i=0
+ elif 1 in parsers:
+ try:
+ from Ft.Xml.Domlette import NonvalidatingReader
+ parser = 1
+ except ImportError,e :
+ err2=e
+ j=0
+ else:
+ try:
+ from xml.dom.minidom import parseString
+ parser = 0
+ except ImportError,e :
+ err2=e
+ parser = -1
+
+ if not i:
+ if 1 in parsers:
+ try:
+ from Ft.Xml.Domlette import NonvalidatingReader
+ parser = 1
+ except ImportError,e :
+ err2=e
+ j=0
+ elif 0 in parsers:
+ try:
+ from xml.dom.minidom import parseString
+ parser = 0
+ except ImportError,e :
+ err2=e
+ parser = -1
else:
- rec = create_record_minidom(xmltext)
- except:
- return None
- return rec
+ parser = -1
-def create_record_minidom(xmltext):
- """
- creates a record object and returns it
- uses xml.dom.minidom
- """
- record = {}
- record["fields"] = []
- dom = parseString(xmltext)
- root = dom.childNodes[0]
- for controlfield in get_childs_by_tag_name(root,"controlfield"):
- record["fields"].append(create_field(controlfield.getAttribute("tag"),get_string_value(controlfield)))
- for datafield in get_childs_by_tag_name(root,"datafield"):
- record["fields"].append(create_field(datafield.getAttribute("tag"),"",datafield.getAttribute("ind1"),datafield.getAttribute("ind2")))
- myfield = record["fields"][len(record["fields"])-1]
- for subfield in get_childs_by_tag_name(datafield,"subfield"):
- field_add_subfield(myfield,subfield.getAttribute("code"),get_string_value(subfield))
- return record
+ if not j:
+ if 0 in parsers:
+ try:
+ from xml.dom.minidom import parseString
+ parser = 0
+ except ImportError,e :
+ err2=e
+ parser = -1
+ else:
+ parser = -1
+
+### INTERFACE / VISIBLE FUNCTIONS
+
+def create_records(xmltext,verbose=verbose,correct=correct):
+ """
+ creates a list of records
+ """
+ global import_error
+ err=[]
+ if import_error == 1:
+ err.append((6,imperr))
+ else:
+ if sys.version >= '2.3':
+ pat = r".*?"
+ p = re.compile(pat,re.DOTALL) # DOTALL - to ignore whitespaces
+ list = p.findall(xmltext)
+ else:
+ l = xmltext.split('')
+ n=len(l)
+ ind = (l[n-1]).rfind('')
+ aux = l[n-1][:ind+9]
+ l[n-1] = aux
+ list=[]
+ for s in l:
+ if s != '':
+ i = -1
+ while (s[i].isspace()):
+ i=i-1
+ if i == -1:#in case there are no spaces at the end
+ i=len(s)-1
+ if s[:i+1].endswith(''):
+ list.append(''+s)
+ listofrec = map((lambda x:create_record(x,verbose,correct)),list)
+ return listofrec
+ return []
-def create_record_4suite(xmltext):
+# Record :: {tag : [Field]}
+# Field :: (Subfields,ind1,ind2,value)
+# Subfields :: [(code,value)]
+
+def create_record(xmltext,verbose = verbose, correct=correct):
"""
creates a record object and returns it
- uses 4Suite domlette
+ uses pyRXP if installed else uses 4Suite domlette or xml.dom.minidom
"""
- record = {}
- record["fields"] = []
- dom = NonvalidatingReader.parseString(xmltext,"urn:dummy")
- root = dom.childNodes[0]
- for controlfield in get_childs_by_tag_name(root,"controlfield"):
- record["fields"].append(create_field(controlfield.getAttributeNS(None,"tag"),get_string_value(controlfield)))
- for datafield in get_childs_by_tag_name(root,"datafield"):
- record["fields"].append(create_field(datafield.getAttributeNS(None,"tag"),"",datafield.getAttributeNS(None,"ind1"),datafield.getAttributeNS(None,"ind2")))
- myfield = record["fields"][len(record["fields"])-1]
- for subfield in get_childs_by_tag_name(datafield,"subfield"):
- field_add_subfield(myfield,subfield.getAttributeNS(None,"code"),get_string_value(subfield))
- return record
+ global parser
-def record_order_fields(rec,fun="order_by_tag"):
- """orders field inside record 'rec' according to a function"""
- rec["fields"].sort(eval(fun))
- return
+ (i,errors) = testImports(parser)
-def record_order_subfields(rec,fun="order_by_code"):
- """orders subfield inside record 'rec' according to a function"""
- for field in rec['fields']:
- field['subfields'].sort(eval(fun))
- return
+ if i==0:
+ return (None,0,errors)
+
+ try:
+ if parser==2:
+ ## the following is because of DTD validation
+ t = """
+
+ \n""" % cfg_marc21_dtd
+ t = "%s%s" % (t,xmltext)
+ t = "%s" % t
+ xmltext = t
+ (rec,er) = create_record_RXP(xmltext,verbose,correct)
+ elif parser:
+ (rec,er) = create_record_4suite(xmltext,verbose,correct)
+ else:
+ (rec,er) = create_record_minidom(xmltext,verbose,correct)
+
+ errs = warnings(er)
+ except:
+ errs = warnings(concat(err))
+ return (None,0,errs)
+ if errs == []:
+ return (rec,1,errs)
+ else:
+ return (rec,0,errs)
+
+
+
def record_get_field_instances(rec):
"""returns the list of fields stored in record 'rec'"""
- return rec["fields"]
+ return rec.items()
+
def record_has_field(rec,tag):
"""checks whether record 'rec' contains tag 'tag'"""
- for field in rec["fields"]:
- if field["tag"] == tag:
- return 1
- return 0
+ return rec.has_key(tag)
def record_add_field(rec,tag,value,ind1="",ind2=""):
"""
adds new field defined by the tag|value|ind1|ind2 parameters to record 'rec'
returns the new field
"""
- newfield = create_field(tag,value,ind1,ind2)
- rec["fields"].append(newfield)
- return newfield
+
+ val=rec.values()
+ if val != []:
+ ord = max([f[4] for x in val for f in x])
+ else:
+ ord = 1
+ newfield = create_field(value,ind1,ind2,[],ord)
+
+ if rec.has_key(tag):
+ rec[tag].append(newfield)
+ else:
+ rec[tag] = [newfield]
+
+ return newfield
def record_delete_field(rec,tag,ind1="",ind2=""):
"""
delete all fields defined with marc tag 'tag' and indicators 'ind1' and 'ind2'
from record 'rec'
"""
newlist = []
- for field in rec["fields"]:
- if not (field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2):
- newlist.append(field)
- rec["fields"] = newlist
+ if rec.has_key(tag):
+ for field in rec[tag]:
+ if not (field[1]==ind1 and field[2]==ind2):
+ newlist.append(field)
+ rec[tag] = newlist
def record_get_field_value(rec,tag,ind1="",ind2="",code=""):
"""
retrieves the value of the first field containing tag 'tag' and indicators 'ind1' and 'ind2'
inside record 'rec'. Returns the found value as a string. If no matching field is found
returns the empty string.
+ if the tag has a '%', it will retrieve the value of first field containg tag, which first characters are those before '%' in tag. The ind1, ind2 and code parameters will be ignored
"""
- for field in rec["fields"]:
- if field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2:
- if field["value"] != "":
- return field["value"]
- else:
- for subfield in field["subfields"]:
- if subfield["code"]==code:
- return subfield["value"]
+
+ s = tag.split('%')
+ if len(s) > 1:
+ t = s[0]
+ keys=rec.keys()
+ tags=[k for k in keys if k.startswith(t)]
+ for tag in tags:
+ fields = rec[tag]
+ for field in fields:
+ if field[3] != "":
+ return field[3]
+ else:
+ for subfield in field[0]:
+ return subfield[1]
+ else:
+ if rec.has_key(tag):
+ fields = rec[tag]
+ for field in fields:
+ if field[1]==ind1 and field[2]==ind2:
+ if field[3] != "":
+ return field[3]
+ else:
+ for subfield in field[0]:
+ if subfield[0]==code:
+ return subfield[1]
+
return ""
def record_get_field_values(rec,tag,ind1="",ind2="",code=""):
"""
retrieves the values of all the fields containing tag 'tag' and indicators 'ind1' and 'ind2'
inside record 'rec'. Returns the found values as a list. If no matching field is found
returns an empty list.
+ if the tag has a '%', it will retrieve the value of all fields containg tag, which first characters are those before '%' in tag. The ind1, ind2 and code parameters will be ignored
"""
tmp = []
- for field in rec["fields"]:
- if field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2:
- if field["value"] != "":
- tmp.append(field["value"])
- else:
- for subfield in field["subfields"]:
- if subfield["code"]==code:
- tmp.append(subfield["value"])
+
+ s = tag.split('%')
+ if len(s) > 1:
+ t = s[0]
+ keys=rec.keys()
+ tags=[k for k in keys if k.startswith(t)]
+ for tag in tags:
+ fields = rec[tag]
+ for field in fields:
+ if field[3] != "":
+ tmp.append(field[3])
+ else:
+ for subfield in field[0]:
+ tmp.append(subfield[1])
+ else:
+ if rec.has_key(tag):
+ fields = rec[tag]
+ for field in fields:
+ if field[1]==ind1 and field[2]==ind2:
+ if field[3] != "":
+ tmp.append(field[3])
+ else:
+ for subfield in field[0]:
+ if subfield[0]==code:
+ tmp.append(subfield[1])
+
return tmp
+def print_rec(rec,format=1):
+ """prints a record
+ format = 1 -- XML
+ format = 2 -- HTML (not implemented)
+ """
+
+ if format==1:
+ text = record_xml_output(rec)
+ else:
+ return ''
+
+ return text
+
+def print_recs(listofrec,format=1):
+ """prints a list of records
+ format = 1 -- XML
+ format = 2 -- HTML (not implemented)
+ if 'listofrec' is not a list it returns empty string
+ """
+ text = ""
+
+ if type(listofrec).__name__ !='list':
+ return ""
+ else:
+ for rec in listofrec:
+ text = "%s\n%s" % (text,print_rec(rec,format))
+ return text
+
def record_xml_output(rec):
"""generates the XML for record 'rec' and returns it as a string"""
xmltext = "\n"
- record_order_fields(rec)
- for field in rec["fields"]:
- xmltext = "%s%s" % (xmltext,field_xml_output(field))
+ #add the tag 'tag' to each field in rec[tag]
+ fields=[]
+ for tag in rec.keys():
+ for field in rec[tag]:
+ fields.append((tag,field))
+
+ record_order_fields(fields)
+
+ for field in fields:
+ xmltext = "%s%s" % (xmltext,field_xml_output(field[1],field[0]))#field[0]=tag
xmltext = "%s" % xmltext
return xmltext
+
+def records_xml_output(listofrec):
+ """generates the XML for the list of records 'listofrec' and returns it as a string"""
+ xmltext = """
+
+ \n""" % cfg_marc21_dtd
+
+ for rec in listofrec:
+ xmltext = "%s%s" % (xmltext, record_xml_output(rec))
+ xmltext = "%s" % xmltext
+ return xmltext
+
+def field_get_subfield_instances(field):
+ """returns the list of subfields associated with field 'field'"""
+ return field[0]
+
+def field_add_subfield(field,code,value):
+ """adds a subfield to field 'field'"""
+ field[0].append(create_subfield(code,value))
+
+
+### IMPLEMENTATION / INVISIBLE FUNCTIONS
+
+def create_record_RXP(xmltext, verbose=verbose, correct=correct):
+ """
+ creates a record object and returns it
+ uses the RXP parser
+
+ If verbose>3 then the parser will be strict and will stop in case of well-formedness errors
+ or DTD errors
+ If verbose=0, the parser will not give warnings
+ If 0 We will try to correct errors such as missing attributtes
+ correct = 0 -> there will not be any attempt to correct errors
+
+ """
+
+ record = {}
+ global err
+
+ ord = 1 # this is needed because of the record_xml_output function, where we need to know
+ # the order of the fields
+
+
+ TAG, ATTRS,CHILD_LIST = range(3)
+
+ if verbose > 3:
+ p = pyRXP.Parser(ErrorOnValidityErrors=1,
+ ProcessDTD=1,
+ ErrorOnUnquotedAttributeValues=1,
+ warnCB = warnCB,
+ srcName='string input')
+ else:
+ p = pyRXP.Parser(ErrorOnValidityErrors=0,
+ ProcessDTD=1,
+ ErrorOnUnquotedAttributeValues=0,
+ warnCB = warnCB,
+ srcName='string input')
+
+
+ if correct:
+ (rec,e) = wash(xmltext)
+ err.extend(e)
+ return (rec,e)
+
+
+ root1=p(xmltext) #root = (tagname, attr_dict, child_list, reserved)
+
+ if root1[0]=='collection':
+ recs = [t for t in root1[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="record"]
+ if recs !=[]:
+ root = recs[0]
+ else:
+ root = None
+ else:
+ root=root1
+
+
+
+ # get childs of 'controlfield'
+ childs_controlfield = []
+ if not root[2]==None:
+ childs_controlfield =[t for t in root[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="controlfield"]
+
+ # get childs of 'datafield'
+ childs_datafield = []
+ if not root[CHILD_LIST]==None:
+ childs_datafield =[t for t in root[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="datafield"]
+
+ for controlfield in childs_controlfield:
+ s=controlfield[ATTRS]["tag"]
+ value=''
+ if not controlfield==None:
+ value=''.join([ n for n in controlfield[CHILD_LIST] if type(n).__name__ == 'str'])
+
+ name = type(value).__name__
+
+ if name in ["int","long"] :
+ st = str(value)
+ elif name == 'str':
+ st = value
+ else:
+ if verbose:
+ err.append((7,'Type found: ' + name))
+ st = "" # the type of value is not correct. (user insert something like a list...)
+
+
+ field = ([],"","",st,ord) #field = (subfields, ind1, ind2,value,ord)
+
+ if record.has_key(s):
+ record[s].append(field)
+ else:
+ record[s]=[field]
+
+ ord = ord+1
+
+ for datafield in childs_datafield:
+
+ #create list of subfields
+ subfields = []
+
+ childs_subfield = []
+ if not datafield[CHILD_LIST]==None:
+ childs_subfield =[t for t in datafield[CHILD_LIST] if type(t).__name__=='tuple' and t[0]=="subfield"]
+
+ for subfield in childs_subfield:
+ value=''
+ if not subfield==None:
+ value=''.join([ n for n in subfield[CHILD_LIST] if type(n).__name__ == 'str'])
+ #get_string_value(subfield)
+ if subfield[ATTRS].has_key('code'):
+ subfields.append((subfield[ATTRS]["code"],value))
+ else:
+ subfields.append(('!',value))
+
+ #create field
+
+ if datafield[ATTRS].has_key('tag'):
+ s = datafield[ATTRS]["tag"]
+ else:
+ s = '!'
+
+ if datafield[ATTRS].has_key('ind1'):
+ ind1 = datafield[ATTRS]["ind1"]
+ else:
+ ind1 = '!'
+
+ if datafield[ATTRS].has_key('ind2'):
+ ind2 = datafield[ATTRS]["ind2"]
+ else:
+ ind2 = '!'
+
+ field = (subfields,ind1,ind2,"",ord)
+
+ if record.has_key(s):
+ record[s].append(field)
+ else:
+ record[s]=[field]
+
+ ord = ord+1
+
+ return (record,err)
+
+
-def create_field(tag,value,ind1="",ind2=""):
+def create_record_minidom(xmltext, verbose=verbose, correct=correct):
+ """
+ creates a record object and returns it
+ uses xml.dom.minidom
+ """
+
+ record = {}
+ ord=1
+ global err
+
+ if correct:
+ xmlt = xmltext
+ (rec,e) = wash(xmlt,0)
+ err.append(e)
+ return (rec,err)
+
+ dom = parseString(xmltext)
+ root = dom.childNodes[0]
+
+ for controlfield in get_childs_by_tag_name(root,"controlfield"):
+ s = controlfield.getAttribute("tag")
+
+ text_nodes = controlfield.childNodes
+ v = u''.join([ n.data for n in text_nodes ])
+
+ name = type(v).__name__
+ if (name in ["int","long"]) :
+ field = ([],"","",str(v),ord) # field = (subfields, ind1, ind2,value)
+ elif name == 'str':
+ field = ([],"","",v,ord)
+ else:
+ if verbose:
+ err.append((7,'Type found: ' + name))
+
+ field = ([],"","","",ord)# the type of value is not correct. (user insert something like a list...)
+
+ if record.has_key(s):
+ record[s].append(field)
+ else:
+ record[s]=[field]
+ ord=ord+1
+
+ for datafield in get_childs_by_tag_name(root,"datafield"):
+ subfields = []
+
+ for subfield in get_childs_by_tag_name(datafield,"subfield"):
+ text_nodes = subfield.childNodes
+ v = u''.join([ n.data for n in text_nodes ])
+ code = subfield.getAttributeNS(None,'code')
+ if code != '':
+ subfields.append((code,v))
+ else:
+ subfields.append(('!',v))
+
+ s = datafield.getAttribute("tag")
+ if s == '':
+ s = '!'
+
+ ind1 = datafield.getAttribute("ind1")
+
+ ind2 = datafield.getAttribute("ind2")
+
+
+ if record.has_key(s):
+ record[s].append((subfields,ind1,ind2,"",ord))
+ else:
+ record[s]=[(subfields,ind1,ind2,"",ord)]
+ ord=ord+1
+
+ return (record,err)
+
+
+def create_record_4suite(xmltext,verbose=verbose,correct=correct):
+ """
+ creates a record object and returns it
+ uses 4Suite domlette
+ """
+ record = {}
+ global err
+
+ if correct:
+ xmlt = xmltext
+ (rec,e) = wash(xmlt,1)
+ err.append(e)
+ return (rec,e)
+
+ dom = NonvalidatingReader.parseString(xmltext,"urn:dummy")
+
+ root = dom.childNodes[0]
+
+ ord=1
+
+ for controlfield in get_childs_by_tag_name(root,"controlfield"):
+ s = controlfield.getAttributeNS(None,"tag")
+
+ text_nodes = controlfield.childNodes
+ v = u''.join([ n.data for n in text_nodes ])
+
+ name = type(v).__name__
+ if (name in ["int","long"]) :
+ field = ([],"","",str(v),ord) # field = (subfields, ind1, ind2,value)
+ elif name == 'str':
+ field = ([],"","",v,ord)
+ else:
+ if verbose:
+ err.append((7,'Type found: ' + name))
+
+ field = ([],"","","",ord)# the type of value is not correct. (user insert something like a list...)
+
+
+ if record.has_key(s):
+ record[s].append(field)
+ else:
+ record[s]=[field]
+ ord=ord+1
+
+
+ for datafield in get_childs_by_tag_name(root,"datafield"):
+ subfields = []
+
+ for subfield in get_childs_by_tag_name(datafield,"subfield"):
+ text_nodes = subfield.childNodes
+ v = u''.join([ n.data for n in text_nodes ])
+
+ code = subfield.getAttributeNS(None,'code')
+ if code != '':
+ subfields.append((code,v))
+ else:
+ subfields.append(('!',v))
+
+ s = datafield.getAttributeNS(None,"tag")
+ if s == '':
+ s = '!'
+
+ ind1 = datafield.getAttributeNS(None,"ind1")
+
+ ind2 = datafield.getAttributeNS(None,"ind2")
+
+
+ if record.has_key(s):
+ record[s].append((subfields,ind1,ind2,"",ord))
+ else:
+ record[s]=[(subfields,ind1,ind2,"",ord)]
+ ord=ord+1
+
+ return (record,err)
+
+
+
+
+def record_order_fields(rec,fun="order_by_ord"):
+ """orders field inside record 'rec' according to a function"""
+ rec.sort(eval(fun))
+ return
+
+def record_order_subfields(rec,fun="order_by_code"):
+ """orders subfield inside record 'rec' according to a function"""
+ for tag in rec:
+ for field in rec[tag]:
+ field[0].sort(eval(fun))
+ return
+
+
+
+def concat(list):
+ """concats a list of lists"""
+ newl = []
+ for l in list:
+ newl.extend(l)
+ return newl
+
+
+
+
+def create_field(value,ind1="",ind2="",subfields=[],ord=-1):
""" creates a field object and returns it"""
- field = {}
- field["subfields"] = []
- field["tag"] = tag
- field["ind1"] = ind1
- field["ind2"] = ind2
- if type(value).__name__ in ["int","long"] :
- field["value"] = str(value)
+
+ name = type(value).__name__
+ if name in ["int","long"] :
+ s = str(value)
+ elif name == 'str':
+ s = value
else:
- field["value"] = value
- return field
+ err.append((7,'Type found: ' + name))
+ s=""
+
-def field_get_subfield_instances(field):
- """returns the list of subfields associated with field 'field'"""
- return field["subfields"]
+ field = (subfields,ind1,ind2,s,ord)
+ return field
+
def field_add_subfield(field,code,value):
"""adds a subfield to field 'field'"""
- field["subfields"].append(create_subfield(code,value))
+ field[0].append(create_subfield(code,value))
-def field_xml_output(field):
+def field_xml_output(field,tag):
"""generates the XML for field 'field' and returns it as a string"""
xmltext = ""
- if field["value"] != "":
- xmltext = "%s %s\n" % (xmltext,field["tag"],escape(field["value"]))
+ if field[3] != "":
+ xmltext = "%s %s\n" % (xmltext,tag,encode_for_xml(field[3]))
else:
- xmltext = "%s \n" % (xmltext,field["tag"],field["ind1"],field["ind2"])
- for subfield in field["subfields"]:
+ xmltext = "%s \n" % (xmltext,tag,field[1],field[2])
+ for subfield in field[0]:
xmltext = "%s%s" % (xmltext,subfield_xml_output(subfield))
xmltext = "%s \n" % xmltext
return xmltext
def create_subfield(code,value):
""" creates a subfield object and returns it"""
- subfield = {}
- subfield["code"] = code
if type(value).__name__ in ["int","long"]:
- subfield["value"] = str(value)
+ s = str(value)
else:
- subfield["value"] = value
+ s = value
+ subfield = (code, s)
+
return subfield
def subfield_xml_output(subfield):
"""generates the XML for a subfield object and return it as a string"""
- xmltext = " %s\n" % (subfield["code"],escape(subfield["value"]))
+ xmltext = " %s\n" % (subfield[0],encode_for_xml(subfield[1]))
return xmltext
-def order_by_tag(field1, field2):
- """function used to order the fields according to their tag value"""
- return cmp(field1["tag"], field2["tag"])
+def order_by_ord(field1, field2):
+ """function used to order the fields according to their ord value"""
+ return cmp(field1[1][4], field2[1][4])
def order_by_code(subfield1,subfield2):
"""function used to order the subfields according to their code value"""
- return cmp(subfield1['code'],subfield2['code'])
+ return cmp(subfield1[0],subfield2[0])
def get_childs_by_tag_name(node, local):
"""retrieves all childs from node 'node' with name 'local' and returns them as a list"""
- res=[]
- for child in node.childNodes:
- if child.localName == local:
- res.append(child)
+ cNodes = list(node.childNodes)
+ res = [child for child in cNodes if child.nodeName==local]
return res
def get_string_value(node):
"""gets all child text nodes of node 'node' and returns them as a unicode string"""
text_nodes = node.childNodes
return u''.join([ n.data for n in text_nodes ])
-def db_get_xml_record(recid,format='xm'):
- """extracts record #newid xml representation from the db and returns it as a string"""
- res = run_sql("SELECT value FROM bibfmt WHERE format=%s and id_bibrec=%s",(format,recid,))
- if len(res) > 0:
- return decompress(res[0][0])
- return ""
+
+
+def get_childs_by_tag_name_RXP(listofchilds,tag):
+ """retrieves all childs from 'listofchilds' with tag name 'tag' and returns them as a list.
+ listofchilds is a list returned by the RXP parser
+ """
+ l=[]
+ if not listofchilds==None:
+ l =[t for t in listofchilds if type(t).__name__=='tuple' and t[0]==tag]
+
+ return l
+
+
+def getAttribute_RXP(root, attr):
+ """ returns the attributte 'attr' from root 'root'
+ root is a node returned by RXP parser
+ """
+ try:
+ return u''.join(root[1][attr])
+ except KeyError,e:
+ return ""
+
+
+def get_string_value_RXP(node):
+ """gets all child text nodes of node 'node' and returns them as a unicode string"""
+ if not node==None:
+ return ''.join([ n for n in node[2] if type(n).__name__ == 'str'])
+ else:
+ return ""
+
+
+def encode_for_xml(s):
+ "Encode special chars in string so that it would be XML-compliant."
+ s = string.replace(s, '&', '&')
+ s = string.replace(s, '<', '<')
+ s = string.replace(s, '>', '>')
+ return s
+
+
+def print_errors(list):
+ """ creates a unique string with the strings in list, using '\n' as a separator """
+ text=""
+
+ for l in list:
+ text = '%s\n%s'% (text,l)
+
+ return text
+
+
+
+def wash(xmltext, parser=2):
+ """
+ Check the structure of the xmltext. Returns a record structure and a list of errors.
+ parser = 1 - 4_suite
+ parser = 2 - pyRXP
+ parser = 0 - minidom
+ """
+
+ errors=[]
+ i,e1 = tagclose('datafield',xmltext)
+ j,e2 = tagclose('controlfield',xmltext)
+ k,e3 = tagclose('subfield',xmltext)
+ w,e4 = tagclose('record',xmltext)
+ errors.extend(e1)
+ errors.extend(e2)
+ errors.extend(e3)
+ errors.extend(e4)
+
+ if i and j and k and w and parser!=-3:
+ if parser==1:
+ (rec,ee) = create_record_4suite(xmltext,0,0)
+ elif parser==2:
+ (rec,ee) = create_record_RXP(xmltext,0,0)
+ else:
+ (rec,ee) = create_record_minidom(xmltext,0,0)
+ else:
+ return (None,errors)
-
\ No newline at end of file
+
+
+ keys = rec.keys()
+
+ for tag in keys:
+ upper_bound = '999'
+ n = len(tag)
+
+ if n>3:
+ i=n-3
+ while i>0:
+ upper_bound = '%s%s' % ('0',upper_bound)
+ i = i-1
+
+ if tag == '!': # missing tag
+ errors.append((1, '(field number(s): ' + ([f[4] for f in rec[tag]]).__str__()+')'))
+ v=rec[tag]
+ rec.__delitem__(tag)
+ rec['000'] = v
+ tag = '000'
+ elif not ("001" <= tag <=upper_bound):
+ errors.append(2)
+ v = rec[tag]
+ rec.__delitem__(tag)
+ rec['000'] = v
+ tag = '000'
+
+ fields =[]
+ for field in rec[tag]:
+ if field[0]==[] and field[3]=='': ## datafield without any subfield
+ errors.append((8,'(field number: '+field[4].__str__()+')'))
+
+ subfields=[]
+ for subfield in field[0]:
+ if subfield[0]=='!':
+ errors.append((3,'(field number: '+field[4].__str__()+')'))
+ newsub = ('',subfield[1])
+ else:
+ newsub = subfield
+ subfields.append(newsub)
+
+ if field[1]=='!':
+ errors.append((4,'(field number: '+field[4].__str__()+')'))
+ ind1 = ""
+ else:
+ ind1 = field[1]
+
+ if field[2]=='!':
+ errors.append((5,'(field number: '+field[4].__str__()+')'))
+ ind2 = ""
+ else:
+ ind2=field[2]
+
+ newf = (subfields,ind1,ind2,field[3],field[4])
+ fields.append(newf)
+
+ rec[tag]=fields
+
+ return (rec,errors)
+
+
+def tagclose(tagname,xmltext):
+ """ checks if an XML document does not hae any missing tag with name tagname
+ """
+ import re
+ errors=[]
+ pat_open = '<'+tagname+'.*?>'
+ pat_close = ''+tagname+'>'
+ p_open = re.compile(pat_open,re.DOTALL) # DOTALL - to ignore whitespaces
+ p_close = re.compile(pat_close,re.DOTALL)
+ list1 = p_open.findall(xmltext)
+ list2 = p_close.findall(xmltext)
+
+ if len(list1)!=len(list2):
+ errors.append((99,'(Tagname : ' + tagname + ')'))
+ return (0,errors)
+ else:
+ return (1,errors)
+
+
+
+def testImports(c):
+ """ Test if the import statements did not failed"""
+ errors=[]
+ global err1,err2
+
+ if c==-1:
+ i = 0
+ errors.append((6,err2))
+ elif c == -3:
+ i=0
+ errors.append((6,err1))
+ else:
+ i=1
+ return (i,errors)
+
+
+def warning(code):
+ """ It returns a warning message of code 'code'.
+ If code = (cd, str) it returns the warning message of code 'cd'
+ and appends str at the end"""
+
+ ws = cfg_bibrecord_warning_msgs
+ s=''
+
+ if type(code).__name__ == 'str':
+ return code
+
+ if type(code).__name__ == 'tuple':
+ if type(code[1]).__name__ == 'str':
+ s = code[1]
+ c = code[0]
+ else:
+ c = code
+ if ws.has_key(c):
+ return ws[c]+s
+ else:
+ return ""
+
+def warnings(l):
+ """it applies the function warning to every element in l"""
+ list = []
+ for w in l:
+ list.append(warning(w))
+ return list
+
diff --git a/modules/bibedit/lib/bibrecord.py.wml b/modules/bibedit/lib/bibrecord.py.wml
index 7b7e0e3a3..cf966f357 100644
--- a/modules/bibedit/lib/bibrecord.py.wml
+++ b/modules/bibedit/lib/bibrecord.py.wml
@@ -1,280 +1,966 @@
## $Id$
-##
+
## This file is part of the CERN Document Server Software (CDSware).
## Copyright (C) 2002 CERN.
##
## The CDSware is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## The CDSware is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDSware; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
-## read config variables:
-#include "config.wml"
-#include "configbis.wml"
-#include "cdswmllib.wml"
-
-## start Python:
-#!
# -*- coding: utf-8 -*-
## $Id$
## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.
+
"""
-BibRecord - xml data processing system for CDSware
+BibRecord - XML MARC processing library for CDSware
"""
-## fill config variables:
-pylibdir = "/python"
-
+### IMPORT INTERESTING MODULES AND XML PARSERS
+
## import interesting modules:
try:
import sys
- from xml.dom import Node
- from xml.sax.saxutils import escape
+ import re
from zlib import decompress
+ import_error = 0
except ImportError, e:
- print "Error: %s" % e
- import sys
- sys.exit(1)
-
+ import_error = 1
+ imperr = e
+
+## test available parsers:
try:
- from Ft.Xml.Domlette import NonvalidatingReader
- is4Suite = 1
-except ImportError,e :
- try:
- from xml.dom.minidom import parseString
- is4Suite = 0
- except ImportError,e :
- print "Error: %s" % e
- import sys
- sys.exit(1)
+ import sys
+ import string
+ err=[]
+except ImportError, e:
+ parser = -3
+ err1 = e
try:
- sys.path.append('%s' % pylibdir)
- from cdsware.config import *
- from cdsware.dbquery import run_sql
+ from bibrecord_config import *
+ verbose = cfg_bibrecord_default_verbose_level
+ correct = cfg_bibrecord_default_correct
+ parsers = cfg_bibrecord_parsers_available
except ImportError, e:
- print "Error: %s" % e
- import sys
- sys.exit(1)
+ parser = -2
+ verbose = 0
+ correct = 0
+ parsers = []
-def create_record(xmltext):
- """
- creates a record object and returns it
- uses 4Suite domlette if installed else uses xml.dom.minidom
- """
- global is4Suite
- try:
- if is4Suite:
- rec = create_record_4suite(xmltext)
+if parsers == []:
+ print 'No parser available'
+ sys.exit(2)
+else:
+ j,i=1,1
+ if 2 in parsers:
+ try:
+ import pyRXP
+ parser = 2
+ ## function to show the pyRXP_parser warnings ##
+ def warnCB(s):
+ """ function used to treat the PyRXP parser warnings"""
+ global err
+ err.append((0,'Parse warning:\n'+s))
+
+ err2 = ""
+ except ImportError,e :
+ err2=e
+ i=0
+ elif 1 in parsers:
+ try:
+ from Ft.Xml.Domlette import NonvalidatingReader
+ parser = 1
+ except ImportError,e :
+ err2=e
+ j=0
+ else:
+ try:
+ from xml.dom.minidom import parseString
+ parser = 0
+ except ImportError,e :
+ err2=e
+ parser = -1
+
+ if not i:
+ if 1 in parsers:
+ try:
+ from Ft.Xml.Domlette import NonvalidatingReader
+ parser = 1
+ except ImportError,e :
+ err2=e
+ j=0
+ elif 0 in parsers:
+ try:
+ from xml.dom.minidom import parseString
+ parser = 0
+ except ImportError,e :
+ err2=e
+ parser = -1
else:
- rec = create_record_minidom(xmltext)
- except:
- return None
- return rec
+ parser = -1
-def create_record_minidom(xmltext):
- """
- creates a record object and returns it
- uses xml.dom.minidom
- """
- record = {}
- record["fields"] = []
- dom = parseString(xmltext)
- root = dom.childNodes[0]
- for controlfield in get_childs_by_tag_name(root,"controlfield"):
- record["fields"].append(create_field(controlfield.getAttribute("tag"),get_string_value(controlfield)))
- for datafield in get_childs_by_tag_name(root,"datafield"):
- record["fields"].append(create_field(datafield.getAttribute("tag"),"",datafield.getAttribute("ind1"),datafield.getAttribute("ind2")))
- myfield = record["fields"][len(record["fields"])-1]
- for subfield in get_childs_by_tag_name(datafield,"subfield"):
- field_add_subfield(myfield,subfield.getAttribute("code"),get_string_value(subfield))
- return record
+ if not j:
+ if 0 in parsers:
+ try:
+ from xml.dom.minidom import parseString
+ parser = 0
+ except ImportError,e :
+ err2=e
+ parser = -1
+ else:
+ parser = -1
+
+### INTERFACE / VISIBLE FUNCTIONS
+
+def create_records(xmltext,verbose=verbose,correct=correct):
+ """
+ creates a list of records
+ """
+ global import_error
+ err=[]
+ if import_error == 1:
+ err.append((6,imperr))
+ else:
+ if sys.version >= '2.3':
+ pat = r".*?"
+ p = re.compile(pat,re.DOTALL) # DOTALL - to ignore whitespaces
+ list = p.findall(xmltext)
+ else:
+ l = xmltext.split('')
+ n=len(l)
+ ind = (l[n-1]).rfind('')
+ aux = l[n-1][:ind+9]
+ l[n-1] = aux
+ list=[]
+ for s in l:
+ if s != '':
+ i = -1
+ while (s[i].isspace()):
+ i=i-1
+ if i == -1:#in case there are no spaces at the end
+ i=len(s)-1
+ if s[:i+1].endswith(''):
+ list.append(''+s)
+ listofrec = map((lambda x:create_record(x,verbose,correct)),list)
+ return listofrec
+ return []
-def create_record_4suite(xmltext):
+# Record :: {tag : [Field]}
+# Field :: (Subfields,ind1,ind2,value)
+# Subfields :: [(code,value)]
+
+def create_record(xmltext,verbose = verbose, correct=correct):
"""
creates a record object and returns it
- uses 4Suite domlette
+ uses pyRXP if installed else uses 4Suite domlette or xml.dom.minidom
"""
- record = {}
- record["fields"] = []
- dom = NonvalidatingReader.parseString(xmltext,"urn:dummy")
- root = dom.childNodes[0]
- for controlfield in get_childs_by_tag_name(root,"controlfield"):
- record["fields"].append(create_field(controlfield.getAttributeNS(None,"tag"),get_string_value(controlfield)))
- for datafield in get_childs_by_tag_name(root,"datafield"):
- record["fields"].append(create_field(datafield.getAttributeNS(None,"tag"),"",datafield.getAttributeNS(None,"ind1"),datafield.getAttributeNS(None,"ind2")))
- myfield = record["fields"][len(record["fields"])-1]
- for subfield in get_childs_by_tag_name(datafield,"subfield"):
- field_add_subfield(myfield,subfield.getAttributeNS(None,"code"),get_string_value(subfield))
- return record
+ global parser
-def record_order_fields(rec,fun="order_by_tag"):
- """orders field inside record 'rec' according to a function"""
- rec["fields"].sort(eval(fun))
- return
+ (i,errors) = testImports(parser)
-def record_order_subfields(rec,fun="order_by_code"):
- """orders subfield inside record 'rec' according to a function"""
- for field in rec['fields']:
- field['subfields'].sort(eval(fun))
- return
+ if i==0:
+ return (None,0,errors)
+
+ try:
+ if parser==2:
+ ## the following is because of DTD validation
+ t = """
+
+ \n""" % cfg_marc21_dtd
+ t = "%s%s" % (t,xmltext)
+ t = "%s" % t
+ xmltext = t
+ (rec,er) = create_record_RXP(xmltext,verbose,correct)
+ elif parser:
+ (rec,er) = create_record_4suite(xmltext,verbose,correct)
+ else:
+ (rec,er) = create_record_minidom(xmltext,verbose,correct)
+
+ errs = warnings(er)
+ except:
+ errs = warnings(concat(err))
+ return (None,0,errs)
+ if errs == []:
+ return (rec,1,errs)
+ else:
+ return (rec,0,errs)
+
+
+
def record_get_field_instances(rec):
"""returns the list of fields stored in record 'rec'"""
- return rec["fields"]
+ return rec.items()
+
def record_has_field(rec,tag):
"""checks whether record 'rec' contains tag 'tag'"""
- for field in rec["fields"]:
- if field["tag"] == tag:
- return 1
- return 0
+ return rec.has_key(tag)
def record_add_field(rec,tag,value,ind1="",ind2=""):
"""
adds new field defined by the tag|value|ind1|ind2 parameters to record 'rec'
returns the new field
"""
- newfield = create_field(tag,value,ind1,ind2)
- rec["fields"].append(newfield)
- return newfield
+
+ val=rec.values()
+ if val != []:
+ ord = max([f[4] for x in val for f in x])
+ else:
+ ord = 1
+ newfield = create_field(value,ind1,ind2,[],ord)
+
+ if rec.has_key(tag):
+ rec[tag].append(newfield)
+ else:
+ rec[tag] = [newfield]
+
+ return newfield
def record_delete_field(rec,tag,ind1="",ind2=""):
"""
delete all fields defined with marc tag 'tag' and indicators 'ind1' and 'ind2'
from record 'rec'
"""
newlist = []
- for field in rec["fields"]:
- if not (field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2):
- newlist.append(field)
- rec["fields"] = newlist
+ if rec.has_key(tag):
+ for field in rec[tag]:
+ if not (field[1]==ind1 and field[2]==ind2):
+ newlist.append(field)
+ rec[tag] = newlist
def record_get_field_value(rec,tag,ind1="",ind2="",code=""):
"""
retrieves the value of the first field containing tag 'tag' and indicators 'ind1' and 'ind2'
inside record 'rec'. Returns the found value as a string. If no matching field is found
returns the empty string.
+ if the tag has a '%', it will retrieve the value of first field containg tag, which first characters are those before '%' in tag. The ind1, ind2 and code parameters will be ignored
"""
- for field in rec["fields"]:
- if field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2:
- if field["value"] != "":
- return field["value"]
- else:
- for subfield in field["subfields"]:
- if subfield["code"]==code:
- return subfield["value"]
+
+ s = tag.split('%')
+ if len(s) > 1:
+ t = s[0]
+ keys=rec.keys()
+ tags=[k for k in keys if k.startswith(t)]
+ for tag in tags:
+ fields = rec[tag]
+ for field in fields:
+ if field[3] != "":
+ return field[3]
+ else:
+ for subfield in field[0]:
+ return subfield[1]
+ else:
+ if rec.has_key(tag):
+ fields = rec[tag]
+ for field in fields:
+ if field[1]==ind1 and field[2]==ind2:
+ if field[3] != "":
+ return field[3]
+ else:
+ for subfield in field[0]:
+ if subfield[0]==code:
+ return subfield[1]
+
return ""
def record_get_field_values(rec,tag,ind1="",ind2="",code=""):
"""
retrieves the values of all the fields containing tag 'tag' and indicators 'ind1' and 'ind2'
inside record 'rec'. Returns the found values as a list. If no matching field is found
returns an empty list.
+ if the tag has a '%', it will retrieve the value of all fields containg tag, which first characters are those before '%' in tag. The ind1, ind2 and code parameters will be ignored
"""
tmp = []
- for field in rec["fields"]:
- if field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2:
- if field["value"] != "":
- tmp.append(field["value"])
- else:
- for subfield in field["subfields"]:
- if subfield["code"]==code:
- tmp.append(subfield["value"])
+
+ s = tag.split('%')
+ if len(s) > 1:
+ t = s[0]
+ keys=rec.keys()
+ tags=[k for k in keys if k.startswith(t)]
+ for tag in tags:
+ fields = rec[tag]
+ for field in fields:
+ if field[3] != "":
+ tmp.append(field[3])
+ else:
+ for subfield in field[0]:
+ tmp.append(subfield[1])
+ else:
+ if rec.has_key(tag):
+ fields = rec[tag]
+ for field in fields:
+ if field[1]==ind1 and field[2]==ind2:
+ if field[3] != "":
+ tmp.append(field[3])
+ else:
+ for subfield in field[0]:
+ if subfield[0]==code:
+ tmp.append(subfield[1])
+
return tmp
+def print_rec(rec,format=1):
+ """prints a record
+ format = 1 -- XML
+ format = 2 -- HTML (not implemented)
+ """
+
+ if format==1:
+ text = record_xml_output(rec)
+ else:
+ return ''
+
+ return text
+
+def print_recs(listofrec,format=1):
+ """prints a list of records
+ format = 1 -- XML
+ format = 2 -- HTML (not implemented)
+ if 'listofrec' is not a list it returns empty string
+ """
+ text = ""
+
+ if type(listofrec).__name__ !='list':
+ return ""
+ else:
+ for rec in listofrec:
+ text = "%s\n%s" % (text,print_rec(rec,format))
+ return text
+
def record_xml_output(rec):
"""generates the XML for record 'rec' and returns it as a string"""
xmltext = "\n"
- record_order_fields(rec)
- for field in rec["fields"]:
- xmltext = "%s%s" % (xmltext,field_xml_output(field))
+ #add the tag 'tag' to each field in rec[tag]
+ fields=[]
+ for tag in rec.keys():
+ for field in rec[tag]:
+ fields.append((tag,field))
+
+ record_order_fields(fields)
+
+ for field in fields:
+ xmltext = "%s%s" % (xmltext,field_xml_output(field[1],field[0]))#field[0]=tag
xmltext = "%s" % xmltext
return xmltext
+
+def records_xml_output(listofrec):
+ """generates the XML for the list of records 'listofrec' and returns it as a string"""
+ xmltext = """
+
+ \n""" % cfg_marc21_dtd
+
+ for rec in listofrec:
+ xmltext = "%s%s" % (xmltext, record_xml_output(rec))
+ xmltext = "%s" % xmltext
+ return xmltext
+
+def field_get_subfield_instances(field):
+ """returns the list of subfields associated with field 'field'"""
+ return field[0]
+
+def field_add_subfield(field,code,value):
+ """adds a subfield to field 'field'"""
+ field[0].append(create_subfield(code,value))
+
+
+### IMPLEMENTATION / INVISIBLE FUNCTIONS
+
+def create_record_RXP(xmltext, verbose=verbose, correct=correct):
+ """
+ creates a record object and returns it
+ uses the RXP parser
+
+ If verbose>3 then the parser will be strict and will stop in case of well-formedness errors
+ or DTD errors
+ If verbose=0, the parser will not give warnings
+ If 0 We will try to correct errors such as missing attributtes
+ correct = 0 -> there will not be any attempt to correct errors
+
+ """
+
+ record = {}
+ global err
+
+ ord = 1 # this is needed because of the record_xml_output function, where we need to know
+ # the order of the fields
+
+
+ TAG, ATTRS,CHILD_LIST = range(3)
+
+ if verbose > 3:
+ p = pyRXP.Parser(ErrorOnValidityErrors=1,
+ ProcessDTD=1,
+ ErrorOnUnquotedAttributeValues=1,
+ warnCB = warnCB,
+ srcName='string input')
+ else:
+ p = pyRXP.Parser(ErrorOnValidityErrors=0,
+ ProcessDTD=1,
+ ErrorOnUnquotedAttributeValues=0,
+ warnCB = warnCB,
+ srcName='string input')
+
+
+ if correct:
+ (rec,e) = wash(xmltext)
+ err.extend(e)
+ return (rec,e)
+
+
+ root1=p(xmltext) #root = (tagname, attr_dict, child_list, reserved)
+
+ if root1[0]=='collection':
+ recs = [t for t in root1[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="record"]
+ if recs !=[]:
+ root = recs[0]
+ else:
+ root = None
+ else:
+ root=root1
+
+
+
+ # get childs of 'controlfield'
+ childs_controlfield = []
+ if not root[2]==None:
+ childs_controlfield =[t for t in root[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="controlfield"]
+
+ # get childs of 'datafield'
+ childs_datafield = []
+ if not root[CHILD_LIST]==None:
+ childs_datafield =[t for t in root[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="datafield"]
+
+ for controlfield in childs_controlfield:
+ s=controlfield[ATTRS]["tag"]
+ value=''
+ if not controlfield==None:
+ value=''.join([ n for n in controlfield[CHILD_LIST] if type(n).__name__ == 'str'])
+
+ name = type(value).__name__
+
+ if name in ["int","long"] :
+ st = str(value)
+ elif name == 'str':
+ st = value
+ else:
+ if verbose:
+ err.append((7,'Type found: ' + name))
+ st = "" # the type of value is not correct. (user insert something like a list...)
+
+
+ field = ([],"","",st,ord) #field = (subfields, ind1, ind2,value,ord)
+
+ if record.has_key(s):
+ record[s].append(field)
+ else:
+ record[s]=[field]
+
+ ord = ord+1
+
+ for datafield in childs_datafield:
+
+ #create list of subfields
+ subfields = []
+
+ childs_subfield = []
+ if not datafield[CHILD_LIST]==None:
+ childs_subfield =[t for t in datafield[CHILD_LIST] if type(t).__name__=='tuple' and t[0]=="subfield"]
+
+ for subfield in childs_subfield:
+ value=''
+ if not subfield==None:
+ value=''.join([ n for n in subfield[CHILD_LIST] if type(n).__name__ == 'str'])
+ #get_string_value(subfield)
+ if subfield[ATTRS].has_key('code'):
+ subfields.append((subfield[ATTRS]["code"],value))
+ else:
+ subfields.append(('!',value))
+
+ #create field
+
+ if datafield[ATTRS].has_key('tag'):
+ s = datafield[ATTRS]["tag"]
+ else:
+ s = '!'
+
+ if datafield[ATTRS].has_key('ind1'):
+ ind1 = datafield[ATTRS]["ind1"]
+ else:
+ ind1 = '!'
+
+ if datafield[ATTRS].has_key('ind2'):
+ ind2 = datafield[ATTRS]["ind2"]
+ else:
+ ind2 = '!'
+
+ field = (subfields,ind1,ind2,"",ord)
+
+ if record.has_key(s):
+ record[s].append(field)
+ else:
+ record[s]=[field]
+
+ ord = ord+1
+
+ return (record,err)
+
+
-def create_field(tag,value,ind1="",ind2=""):
+def create_record_minidom(xmltext, verbose=verbose, correct=correct):
+ """
+ creates a record object and returns it
+ uses xml.dom.minidom
+ """
+
+ record = {}
+ ord=1
+ global err
+
+ if correct:
+ xmlt = xmltext
+ (rec,e) = wash(xmlt,0)
+ err.append(e)
+ return (rec,err)
+
+ dom = parseString(xmltext)
+ root = dom.childNodes[0]
+
+ for controlfield in get_childs_by_tag_name(root,"controlfield"):
+ s = controlfield.getAttribute("tag")
+
+ text_nodes = controlfield.childNodes
+ v = u''.join([ n.data for n in text_nodes ])
+
+ name = type(v).__name__
+ if (name in ["int","long"]) :
+ field = ([],"","",str(v),ord) # field = (subfields, ind1, ind2,value)
+ elif name == 'str':
+ field = ([],"","",v,ord)
+ else:
+ if verbose:
+ err.append((7,'Type found: ' + name))
+
+ field = ([],"","","",ord)# the type of value is not correct. (user insert something like a list...)
+
+ if record.has_key(s):
+ record[s].append(field)
+ else:
+ record[s]=[field]
+ ord=ord+1
+
+ for datafield in get_childs_by_tag_name(root,"datafield"):
+ subfields = []
+
+ for subfield in get_childs_by_tag_name(datafield,"subfield"):
+ text_nodes = subfield.childNodes
+ v = u''.join([ n.data for n in text_nodes ])
+ code = subfield.getAttributeNS(None,'code')
+ if code != '':
+ subfields.append((code,v))
+ else:
+ subfields.append(('!',v))
+
+ s = datafield.getAttribute("tag")
+ if s == '':
+ s = '!'
+
+ ind1 = datafield.getAttribute("ind1")
+
+ ind2 = datafield.getAttribute("ind2")
+
+
+ if record.has_key(s):
+ record[s].append((subfields,ind1,ind2,"",ord))
+ else:
+ record[s]=[(subfields,ind1,ind2,"",ord)]
+ ord=ord+1
+
+ return (record,err)
+
+
+def create_record_4suite(xmltext,verbose=verbose,correct=correct):
+ """
+ creates a record object and returns it
+ uses 4Suite domlette
+ """
+ record = {}
+ global err
+
+ if correct:
+ xmlt = xmltext
+ (rec,e) = wash(xmlt,1)
+ err.append(e)
+ return (rec,e)
+
+ dom = NonvalidatingReader.parseString(xmltext,"urn:dummy")
+
+ root = dom.childNodes[0]
+
+ ord=1
+
+ for controlfield in get_childs_by_tag_name(root,"controlfield"):
+ s = controlfield.getAttributeNS(None,"tag")
+
+ text_nodes = controlfield.childNodes
+ v = u''.join([ n.data for n in text_nodes ])
+
+ name = type(v).__name__
+ if (name in ["int","long"]) :
+ field = ([],"","",str(v),ord) # field = (subfields, ind1, ind2,value)
+ elif name == 'str':
+ field = ([],"","",v,ord)
+ else:
+ if verbose:
+ err.append((7,'Type found: ' + name))
+
+ field = ([],"","","",ord)# the type of value is not correct. (user insert something like a list...)
+
+
+ if record.has_key(s):
+ record[s].append(field)
+ else:
+ record[s]=[field]
+ ord=ord+1
+
+
+ for datafield in get_childs_by_tag_name(root,"datafield"):
+ subfields = []
+
+ for subfield in get_childs_by_tag_name(datafield,"subfield"):
+ text_nodes = subfield.childNodes
+ v = u''.join([ n.data for n in text_nodes ])
+
+ code = subfield.getAttributeNS(None,'code')
+ if code != '':
+ subfields.append((code,v))
+ else:
+ subfields.append(('!',v))
+
+ s = datafield.getAttributeNS(None,"tag")
+ if s == '':
+ s = '!'
+
+ ind1 = datafield.getAttributeNS(None,"ind1")
+
+ ind2 = datafield.getAttributeNS(None,"ind2")
+
+
+ if record.has_key(s):
+ record[s].append((subfields,ind1,ind2,"",ord))
+ else:
+ record[s]=[(subfields,ind1,ind2,"",ord)]
+ ord=ord+1
+
+ return (record,err)
+
+
+
+
+def record_order_fields(rec,fun="order_by_ord"):
+ """orders field inside record 'rec' according to a function"""
+ rec.sort(eval(fun))
+ return
+
+def record_order_subfields(rec,fun="order_by_code"):
+ """orders subfield inside record 'rec' according to a function"""
+ for tag in rec:
+ for field in rec[tag]:
+ field[0].sort(eval(fun))
+ return
+
+
+
+def concat(list):
+ """concats a list of lists"""
+ newl = []
+ for l in list:
+ newl.extend(l)
+ return newl
+
+
+
+
+def create_field(value,ind1="",ind2="",subfields=[],ord=-1):
""" creates a field object and returns it"""
- field = {}
- field["subfields"] = []
- field["tag"] = tag
- field["ind1"] = ind1
- field["ind2"] = ind2
- if type(value).__name__ in ["int","long"] :
- field["value"] = str(value)
+
+ name = type(value).__name__
+ if name in ["int","long"] :
+ s = str(value)
+ elif name == 'str':
+ s = value
else:
- field["value"] = value
- return field
+ err.append((7,'Type found: ' + name))
+ s=""
+
-def field_get_subfield_instances(field):
- """returns the list of subfields associated with field 'field'"""
- return field["subfields"]
+ field = (subfields,ind1,ind2,s,ord)
+ return field
+
def field_add_subfield(field,code,value):
"""adds a subfield to field 'field'"""
- field["subfields"].append(create_subfield(code,value))
+ field[0].append(create_subfield(code,value))
-def field_xml_output(field):
+def field_xml_output(field,tag):
"""generates the XML for field 'field' and returns it as a string"""
xmltext = ""
- if field["value"] != "":
- xmltext = "%s %s\n" % (xmltext,field["tag"],escape(field["value"]))
+ if field[3] != "":
+ xmltext = "%s %s\n" % (xmltext,tag,encode_for_xml(field[3]))
else:
- xmltext = "%s \n" % (xmltext,field["tag"],field["ind1"],field["ind2"])
- for subfield in field["subfields"]:
+ xmltext = "%s \n" % (xmltext,tag,field[1],field[2])
+ for subfield in field[0]:
xmltext = "%s%s" % (xmltext,subfield_xml_output(subfield))
xmltext = "%s \n" % xmltext
return xmltext
def create_subfield(code,value):
""" creates a subfield object and returns it"""
- subfield = {}
- subfield["code"] = code
if type(value).__name__ in ["int","long"]:
- subfield["value"] = str(value)
+ s = str(value)
else:
- subfield["value"] = value
+ s = value
+ subfield = (code, s)
+
return subfield
def subfield_xml_output(subfield):
"""generates the XML for a subfield object and return it as a string"""
- xmltext = " %s\n" % (subfield["code"],escape(subfield["value"]))
+ xmltext = " %s\n" % (subfield[0],encode_for_xml(subfield[1]))
return xmltext
-def order_by_tag(field1, field2):
- """function used to order the fields according to their tag value"""
- return cmp(field1["tag"], field2["tag"])
+def order_by_ord(field1, field2):
+ """function used to order the fields according to their ord value"""
+ return cmp(field1[1][4], field2[1][4])
def order_by_code(subfield1,subfield2):
"""function used to order the subfields according to their code value"""
- return cmp(subfield1['code'],subfield2['code'])
+ return cmp(subfield1[0],subfield2[0])
def get_childs_by_tag_name(node, local):
"""retrieves all childs from node 'node' with name 'local' and returns them as a list"""
- res=[]
- for child in node.childNodes:
- if child.localName == local:
- res.append(child)
+ cNodes = list(node.childNodes)
+ res = [child for child in cNodes if child.nodeName==local]
return res
def get_string_value(node):
"""gets all child text nodes of node 'node' and returns them as a unicode string"""
text_nodes = node.childNodes
return u''.join([ n.data for n in text_nodes ])
-def db_get_xml_record(recid,format='xm'):
- """extracts record #newid xml representation from the db and returns it as a string"""
- res = run_sql("SELECT value FROM bibfmt WHERE format=%s and id_bibrec=%s",(format,recid,))
- if len(res) > 0:
- return decompress(res[0][0])
- return ""
+
+
+def get_childs_by_tag_name_RXP(listofchilds,tag):
+ """retrieves all childs from 'listofchilds' with tag name 'tag' and returns them as a list.
+ listofchilds is a list returned by the RXP parser
+ """
+ l=[]
+ if not listofchilds==None:
+ l =[t for t in listofchilds if type(t).__name__=='tuple' and t[0]==tag]
+
+ return l
+
+
+def getAttribute_RXP(root, attr):
+ """ returns the attributte 'attr' from root 'root'
+ root is a node returned by RXP parser
+ """
+ try:
+ return u''.join(root[1][attr])
+ except KeyError,e:
+ return ""
+
+
+def get_string_value_RXP(node):
+ """gets all child text nodes of node 'node' and returns them as a unicode string"""
+ if not node==None:
+ return ''.join([ n for n in node[2] if type(n).__name__ == 'str'])
+ else:
+ return ""
+
+
+def encode_for_xml(s):
+ "Encode special chars in string so that it would be XML-compliant."
+ s = string.replace(s, '&', '&')
+ s = string.replace(s, '<', '<')
+ s = string.replace(s, '>', '>')
+ return s
+
+
+def print_errors(list):
+ """ creates a unique string with the strings in list, using '\n' as a separator """
+ text=""
+
+ for l in list:
+ text = '%s\n%s'% (text,l)
+
+ return text
+
+
+
+def wash(xmltext, parser=2):
+ """
+ Check the structure of the xmltext. Returns a record structure and a list of errors.
+ parser = 1 - 4_suite
+ parser = 2 - pyRXP
+ parser = 0 - minidom
+ """
+
+ errors=[]
+ i,e1 = tagclose('datafield',xmltext)
+ j,e2 = tagclose('controlfield',xmltext)
+ k,e3 = tagclose('subfield',xmltext)
+ w,e4 = tagclose('record',xmltext)
+ errors.extend(e1)
+ errors.extend(e2)
+ errors.extend(e3)
+ errors.extend(e4)
+
+ if i and j and k and w and parser!=-3:
+ if parser==1:
+ (rec,ee) = create_record_4suite(xmltext,0,0)
+ elif parser==2:
+ (rec,ee) = create_record_RXP(xmltext,0,0)
+ else:
+ (rec,ee) = create_record_minidom(xmltext,0,0)
+ else:
+ return (None,errors)
-
\ No newline at end of file
+
+
+ keys = rec.keys()
+
+ for tag in keys:
+ upper_bound = '999'
+ n = len(tag)
+
+ if n>3:
+ i=n-3
+ while i>0:
+ upper_bound = '%s%s' % ('0',upper_bound)
+ i = i-1
+
+ if tag == '!': # missing tag
+ errors.append((1, '(field number(s): ' + ([f[4] for f in rec[tag]]).__str__()+')'))
+ v=rec[tag]
+ rec.__delitem__(tag)
+ rec['000'] = v
+ tag = '000'
+ elif not ("001" <= tag <=upper_bound):
+ errors.append(2)
+ v = rec[tag]
+ rec.__delitem__(tag)
+ rec['000'] = v
+ tag = '000'
+
+ fields =[]
+ for field in rec[tag]:
+ if field[0]==[] and field[3]=='': ## datafield without any subfield
+ errors.append((8,'(field number: '+field[4].__str__()+')'))
+
+ subfields=[]
+ for subfield in field[0]:
+ if subfield[0]=='!':
+ errors.append((3,'(field number: '+field[4].__str__()+')'))
+ newsub = ('',subfield[1])
+ else:
+ newsub = subfield
+ subfields.append(newsub)
+
+ if field[1]=='!':
+ errors.append((4,'(field number: '+field[4].__str__()+')'))
+ ind1 = ""
+ else:
+ ind1 = field[1]
+
+ if field[2]=='!':
+ errors.append((5,'(field number: '+field[4].__str__()+')'))
+ ind2 = ""
+ else:
+ ind2=field[2]
+
+ newf = (subfields,ind1,ind2,field[3],field[4])
+ fields.append(newf)
+
+ rec[tag]=fields
+
+ return (rec,errors)
+
+
+def tagclose(tagname,xmltext):
+ """ checks if an XML document does not hae any missing tag with name tagname
+ """
+ import re
+ errors=[]
+ pat_open = '<'+tagname+'.*?>'
+ pat_close = ''+tagname+'>'
+ p_open = re.compile(pat_open,re.DOTALL) # DOTALL - to ignore whitespaces
+ p_close = re.compile(pat_close,re.DOTALL)
+ list1 = p_open.findall(xmltext)
+ list2 = p_close.findall(xmltext)
+
+ if len(list1)!=len(list2):
+ errors.append((99,'(Tagname : ' + tagname + ')'))
+ return (0,errors)
+ else:
+ return (1,errors)
+
+
+
+def testImports(c):
+ """ Test if the import statements did not failed"""
+ errors=[]
+ global err1,err2
+
+ if c==-1:
+ i = 0
+ errors.append((6,err2))
+ elif c == -3:
+ i=0
+ errors.append((6,err1))
+ else:
+ i=1
+ return (i,errors)
+
+
+def warning(code):
+ """ It returns a warning message of code 'code'.
+ If code = (cd, str) it returns the warning message of code 'cd'
+ and appends str at the end"""
+
+ ws = cfg_bibrecord_warning_msgs
+ s=''
+
+ if type(code).__name__ == 'str':
+ return code
+
+ if type(code).__name__ == 'tuple':
+ if type(code[1]).__name__ == 'str':
+ s = code[1]
+ c = code[0]
+ else:
+ c = code
+ if ws.has_key(c):
+ return ws[c]+s
+ else:
+ return ""
+
+def warnings(l):
+ """it applies the function warning to every element in l"""
+ list = []
+ for w in l:
+ list.append(warning(w))
+ return list
+
diff --git a/modules/bibedit/lib/bibrecord_config.py b/modules/bibedit/lib/bibrecord_config.py
new file mode 100644
index 000000000..ff4f2ffe0
--- /dev/null
+++ b/modules/bibedit/lib/bibrecord_config.py
@@ -0,0 +1,46 @@
+## $Id$
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+#include "configbis.wml"
+
+### CONFIGURATION OPTIONS FOR BIBRECORD LIBRARY
+
+#0: Parse Warning
+"""Configuration for module bibrecord"""
+
+cfg_marc21_dtd = "/bibedit/MARC21slim.dtd"
+
+cfg_bibrecord_warning_msgs = {
+ 0: '' ,
+ 1: 'WARNING: tag missing for field(s)\nValue stored with tag \'000\'',
+ 2: 'WARNING: bad range for tags (tag must be in range 001-999)\nValue stored with tag \'000\'',
+ 3: 'WARNING: Missing atributte \'code\' for subfield\nValue stored with code \'\'',
+ 4: 'WARNING: Missing attributte \'ind1\'\n Value stored with ind1 = \'\'',
+ 5: 'WARNING: Missing attributte \'ind2\'\n Value stored with ind2 = \'\'',
+ 6: 'Import Error\n',
+ 7: 'WARNING: value expected of type string.',
+ 8: 'WARNING: empty datafield',
+ 98:'WARNING: problems importing cdsware',
+ 99: 'Document not well formed'
+ } # Dictionary of warning messages
+
+cfg_bibrecord_default_verbose_level=0 #Verbose level to be used in the creation of records
+cfg_bibrecord_default_correct=0 #Correct (0/1) to be used in the creation of records
+cfg_bibrecord_parsers_available = [0,1,2] #parsers available
+ # 0 - minidom; 1-is4suite; 2-pyRXP
diff --git a/modules/bibedit/lib/bibrecord_config.py.wml b/modules/bibedit/lib/bibrecord_config.py.wml
new file mode 100644
index 000000000..ff4f2ffe0
--- /dev/null
+++ b/modules/bibedit/lib/bibrecord_config.py.wml
@@ -0,0 +1,46 @@
+## $Id$
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+#include "configbis.wml"
+
+### CONFIGURATION OPTIONS FOR BIBRECORD LIBRARY
+
+#0: Parse Warning
+"""Configuration for module bibrecord"""
+
+cfg_marc21_dtd = "/bibedit/MARC21slim.dtd"
+
+cfg_bibrecord_warning_msgs = {
+ 0: '' ,
+ 1: 'WARNING: tag missing for field(s)\nValue stored with tag \'000\'',
+ 2: 'WARNING: bad range for tags (tag must be in range 001-999)\nValue stored with tag \'000\'',
+ 3: 'WARNING: Missing atributte \'code\' for subfield\nValue stored with code \'\'',
+ 4: 'WARNING: Missing attributte \'ind1\'\n Value stored with ind1 = \'\'',
+ 5: 'WARNING: Missing attributte \'ind2\'\n Value stored with ind2 = \'\'',
+ 6: 'Import Error\n',
+ 7: 'WARNING: value expected of type string.',
+ 8: 'WARNING: empty datafield',
+ 98:'WARNING: problems importing cdsware',
+ 99: 'Document not well formed'
+ } # Dictionary of warning messages
+
+cfg_bibrecord_default_verbose_level=0 #Verbose level to be used in the creation of records
+cfg_bibrecord_default_correct=0 #Correct (0/1) to be used in the creation of records
+cfg_bibrecord_parsers_available = [0,1,2] #parsers available
+ # 0 - minidom; 1-is4suite; 2-pyRXP
diff --git a/modules/bibedit/lib/bibrecord_tests.py b/modules/bibedit/lib/bibrecord_tests.py
new file mode 100644
index 000000000..f9c8a706c
--- /dev/null
+++ b/modules/bibedit/lib/bibrecord_tests.py
@@ -0,0 +1,201 @@
+## $Id$
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+# -*- coding: utf-8 -*-
+
+from config import tmpdir
+import bibrecord
+import unittest
+from string import expandtabs, replace
+
+class SanityTest(unittest.TestCase):
+
+### check for sanity -- xml -> create_record -> xml
+ def test_for_sanity(self):
+ """ bibrecord - checking for sanity """
+
+ f=open(tmpdir + '/demobibdata.xml','r')
+ xmltext = f.read()
+ f.close()
+ rs = bibrecord.create_records(xmltext)
+ recs = map((lambda x:x[0]),rs)
+ xmlT = bibrecord.records_xml_output(recs)
+ x = xmlT.replace('\n','')
+ y = xmltext.replace('\n','')
+ xx=expandtabs(x)
+ yy=expandtabs(y)
+ xxx = xx.replace(' ','')
+ yyy = yy.replace(' ','')
+ #self.assertEqual(xxx,yyy)
+
+### testing for success
+
+class SuccessTest(unittest.TestCase):
+ """ bibrecord - testing for success """
+ def setUp(self):
+ f=open(tmpdir + '/demobibdata.xml','r')
+ xmltext = f.read()
+ f.close()
+ self.recs = map((lambda x:x[0]),bibrecord.create_records(xmltext))
+
+ def test_records_created(self):
+ """ bibrecord - number of records created """
+ ## check if it creates every records (the file demobibdata.xml has 75 records)
+ self.assertEqual(75,len(self.recs))
+
+ def test_tags_created(self):
+ """ bibrecord - tags created """
+ ## check if the tags are correct
+ tags= ['020', '037', '041', '080', '088', '100', '245', '246', '250', '260', '270', '300', '340', '490', '500', '502', '520', '590', '595', '650', '653', '690', '700', '710', '856','909','980','999']
+ t=[]
+ for rec in self.recs:
+ t.extend(rec.keys())
+ t.sort()
+ #eliminate the elements repeated
+ tt = []
+ for x in t:
+ if not x in tt:
+ tt.append(x)
+ self.assertEqual(tags,tt)
+
+ def test_fields_created(self):
+ """bibrecord - fields created"""
+ ## check if the number of fields for each record is correct
+
+ fields=[13,13, 8, 11, 10,12, 10, 14, 10, 17, 13, 15, 10, 9, 14, 10, 11, 11, 11, 9, 10, 10, 10, 8, 8, 8, 9, 9, 9, 10, 8, 8, 8,8, 14, 13, 14, 14, 15, 12,12, 12,14, 13, 11, 15, 15, 14, 14, 13, 15, 14, 14, 14, 15, 14, 15, 14, 14, 15, 14, 13, 13, 14, 11, 13, 11, 14, 8, 10, 13, 12, 11, 12,6]
+
+ cr=[]
+ ret=[]
+ for rec in self.recs:
+ cr.append(len(rec.values()))
+ ret.append(rec)
+ self.assertEqual(fields,cr)
+
+class BadInputTreatmentTest(unittest.TestCase):
+ """ bibrecord - testing for bad input treatment """
+
+
+### check bad input treatment ###
+ def test_wrong_attribute(self):
+ """bibrecord - bad input : Has \'cde\' instead \'code\' in a subfield attribute"""
+ ws = bibrecord.cfg_bibrecord_warning_msgs
+ xml_error1 = """
+
+ 33
+
+ eng
+
+
+ Doe, John
+
+
+ On the foo and bar
+
+
+ """
+ (rec,st,e) = bibrecord.create_record(xml_error1,1,1)
+ ee=''
+ for i in e:
+ if type(i).__name__ == 'str':
+ if i.count(ws[3])>0:
+ ee = i
+ self.assertEqual(bibrecord.warning((3,'(field number: 4)')),ee)
+
+ def test_missing_attribute(self):
+ """ bibrecord - bad input : Missing attribute \"tag\" """
+ ws = bibrecord.cfg_bibrecord_warning_msgs
+ xml_error2 = """
+
+ 33
+
+ eng
+
+
+ Doe, John
+
+
+ On the foo and bar
+
+
+ """
+ (rec,st,e) = bibrecord.create_record(xml_error2,1,1)
+ ee=''
+ for i in e:
+ if type(i).__name__ == 'str':
+ if i.count(ws[1])>0:
+ ee = i
+ self.assertEqual(bibrecord.warning((1,'(field number(s): [2])')),ee)
+
+ def test_empty_datafield(self):
+ """ bibrecord - bad input : Datafield without any subfield """
+ ws = bibrecord.cfg_bibrecord_warning_msgs
+ xml_error3 = """
+
+ 33
+
+
+
+ Doe, John
+
+
+ On the foo and bar
+
+
+ """
+ (rec,st,e) = bibrecord.create_record(xml_error3,1,1)
+ ee=''
+ for i in e:
+ if type(i).__name__ == 'str':
+ if i.count(ws[8])>0:
+ ee = i
+ self.assertEqual(bibrecord.warning((8,'(field number: 2)')),ee)
+
+
+ def test_missing_tag(self):
+ """bibrecord - bad input : Missing end \"tag\""""
+ ws = bibrecord.cfg_bibrecord_warning_msgs
+ xml_error4 = """
+
+ 33
+
+ eng
+
+
+ Doe, John
+
+
+ On the foo and bar
+
+ """
+ (rec,st,e) = bibrecord.create_record(xml_error4,1,1)
+ ee = ''
+ for i in e:
+ if type(i).__name__ == 'str':
+ if i.count(ws[99])>0:
+ ee = i
+ self.assertEqual(bibrecord.warning((99,'(Tagname : datafield)')),ee)
+
+def create_test_suite():
+ """Return test suite for the bibrecord module"""
+ return unittest.TestSuite((unittest.makeSuite(SanityTest,'test'),
+ unittest.makeSuite(SuccessTest,'test'),
+ unittest.makeSuite(BadInputTreatmentTest,'test')))
+if __name__ == '__main__':
+ unittest.TextTestRunner(verbosity=2).run(create_test_suite())
+
diff --git a/modules/bibedit/lib/bibrecord_tests.py.wml b/modules/bibedit/lib/bibrecord_tests.py.wml
new file mode 100644
index 000000000..f9c8a706c
--- /dev/null
+++ b/modules/bibedit/lib/bibrecord_tests.py.wml
@@ -0,0 +1,201 @@
+## $Id$
+
+## This file is part of the CERN Document Server Software (CDSware).
+## Copyright (C) 2002 CERN.
+##
+## The CDSware is free software; you can redistribute it and/or
+## modify it under the terms of the GNU General Public License as
+## published by the Free Software Foundation; either version 2 of the
+## License, or (at your option) any later version.
+##
+## The CDSware is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with CDSware; if not, write to the Free Software Foundation, Inc.,
+## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
+
+# -*- coding: utf-8 -*-
+
+from config import tmpdir
+import bibrecord
+import unittest
+from string import expandtabs, replace
+
+class SanityTest(unittest.TestCase):
+
+### check for sanity -- xml -> create_record -> xml
+ def test_for_sanity(self):
+ """ bibrecord - checking for sanity """
+
+ f=open(tmpdir + '/demobibdata.xml','r')
+ xmltext = f.read()
+ f.close()
+ rs = bibrecord.create_records(xmltext)
+ recs = map((lambda x:x[0]),rs)
+ xmlT = bibrecord.records_xml_output(recs)
+ x = xmlT.replace('\n','')
+ y = xmltext.replace('\n','')
+ xx=expandtabs(x)
+ yy=expandtabs(y)
+ xxx = xx.replace(' ','')
+ yyy = yy.replace(' ','')
+ #self.assertEqual(xxx,yyy)
+
+### testing for success
+
+class SuccessTest(unittest.TestCase):
+ """ bibrecord - testing for success """
+ def setUp(self):
+ f=open(tmpdir + '/demobibdata.xml','r')
+ xmltext = f.read()
+ f.close()
+ self.recs = map((lambda x:x[0]),bibrecord.create_records(xmltext))
+
+ def test_records_created(self):
+ """ bibrecord - number of records created """
+ ## check if it creates every records (the file demobibdata.xml has 75 records)
+ self.assertEqual(75,len(self.recs))
+
+ def test_tags_created(self):
+ """ bibrecord - tags created """
+ ## check if the tags are correct
+ tags= ['020', '037', '041', '080', '088', '100', '245', '246', '250', '260', '270', '300', '340', '490', '500', '502', '520', '590', '595', '650', '653', '690', '700', '710', '856','909','980','999']
+ t=[]
+ for rec in self.recs:
+ t.extend(rec.keys())
+ t.sort()
+ #eliminate the elements repeated
+ tt = []
+ for x in t:
+ if not x in tt:
+ tt.append(x)
+ self.assertEqual(tags,tt)
+
+ def test_fields_created(self):
+ """bibrecord - fields created"""
+ ## check if the number of fields for each record is correct
+
+ fields=[13,13, 8, 11, 10,12, 10, 14, 10, 17, 13, 15, 10, 9, 14, 10, 11, 11, 11, 9, 10, 10, 10, 8, 8, 8, 9, 9, 9, 10, 8, 8, 8,8, 14, 13, 14, 14, 15, 12,12, 12,14, 13, 11, 15, 15, 14, 14, 13, 15, 14, 14, 14, 15, 14, 15, 14, 14, 15, 14, 13, 13, 14, 11, 13, 11, 14, 8, 10, 13, 12, 11, 12,6]
+
+ cr=[]
+ ret=[]
+ for rec in self.recs:
+ cr.append(len(rec.values()))
+ ret.append(rec)
+ self.assertEqual(fields,cr)
+
+class BadInputTreatmentTest(unittest.TestCase):
+ """ bibrecord - testing for bad input treatment """
+
+
+### check bad input treatment ###
+ def test_wrong_attribute(self):
+ """bibrecord - bad input : Has \'cde\' instead \'code\' in a subfield attribute"""
+ ws = bibrecord.cfg_bibrecord_warning_msgs
+ xml_error1 = """
+
+ 33
+
+ eng
+
+
+ Doe, John
+
+
+ On the foo and bar
+
+
+ """
+ (rec,st,e) = bibrecord.create_record(xml_error1,1,1)
+ ee=''
+ for i in e:
+ if type(i).__name__ == 'str':
+ if i.count(ws[3])>0:
+ ee = i
+ self.assertEqual(bibrecord.warning((3,'(field number: 4)')),ee)
+
+ def test_missing_attribute(self):
+ """ bibrecord - bad input : Missing attribute \"tag\" """
+ ws = bibrecord.cfg_bibrecord_warning_msgs
+ xml_error2 = """
+
+ 33
+
+ eng
+
+
+ Doe, John
+
+
+ On the foo and bar
+
+
+ """
+ (rec,st,e) = bibrecord.create_record(xml_error2,1,1)
+ ee=''
+ for i in e:
+ if type(i).__name__ == 'str':
+ if i.count(ws[1])>0:
+ ee = i
+ self.assertEqual(bibrecord.warning((1,'(field number(s): [2])')),ee)
+
+ def test_empty_datafield(self):
+ """ bibrecord - bad input : Datafield without any subfield """
+ ws = bibrecord.cfg_bibrecord_warning_msgs
+ xml_error3 = """
+
+ 33
+
+
+
+ Doe, John
+
+
+ On the foo and bar
+
+
+ """
+ (rec,st,e) = bibrecord.create_record(xml_error3,1,1)
+ ee=''
+ for i in e:
+ if type(i).__name__ == 'str':
+ if i.count(ws[8])>0:
+ ee = i
+ self.assertEqual(bibrecord.warning((8,'(field number: 2)')),ee)
+
+
+ def test_missing_tag(self):
+ """bibrecord - bad input : Missing end \"tag\""""
+ ws = bibrecord.cfg_bibrecord_warning_msgs
+ xml_error4 = """
+
+ 33
+
+ eng
+
+
+ Doe, John
+
+
+ On the foo and bar
+
+ """
+ (rec,st,e) = bibrecord.create_record(xml_error4,1,1)
+ ee = ''
+ for i in e:
+ if type(i).__name__ == 'str':
+ if i.count(ws[99])>0:
+ ee = i
+ self.assertEqual(bibrecord.warning((99,'(Tagname : datafield)')),ee)
+
+def create_test_suite():
+ """Return test suite for the bibrecord module"""
+ return unittest.TestSuite((unittest.makeSuite(SanityTest,'test'),
+ unittest.makeSuite(SuccessTest,'test'),
+ unittest.makeSuite(BadInputTreatmentTest,'test')))
+if __name__ == '__main__':
+ unittest.TextTestRunner(verbosity=2).run(create_test_suite())
+