diff --git a/modules/bibedit/Makefile.am b/modules/bibedit/Makefile.am index 8a0968291..7f66ad19b 100644 --- a/modules/bibedit/Makefile.am +++ b/modules/bibedit/Makefile.am @@ -1,22 +1,22 @@ ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = doc lib +SUBDIRS = bin doc etc lib CLEANFILES = *~ \ No newline at end of file diff --git a/modules/bibedit/bin/.cvsignore b/modules/bibedit/bin/.cvsignore new file mode 100644 index 000000000..a573d515a --- /dev/null +++ b/modules/bibedit/bin/.cvsignore @@ -0,0 +1,6 @@ +Makefile +Makefile.in +z_* +*.O +*~ +xmlmarclint \ No newline at end of file diff --git a/modules/bibedit/lib/Makefile.am b/modules/bibedit/bin/Makefile.am similarity index 82% copy from modules/bibedit/lib/Makefile.am copy to modules/bibedit/bin/Makefile.am index e717cdbae..7809b963d 100644 --- a/modules/bibedit/lib/Makefile.am +++ b/modules/bibedit/bin/Makefile.am @@ -1,29 +1,29 @@ ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -pylibdir = $(libdir)/python/cdsware -pylib_DATA = bibrecord.py +bin_SCRIPTS=xmlmarclint FILESWML = $(wildcard $(srcdir)/*.wml) EXTRA_DIST = $(FILESWML:$(srcdir)/%=%) -CLEANFILES = $(pylib_DATA) *~ *.tmp *.pyc +CLEANFILES = $(bin_SCRIPTS) *~ *.tmp xmlmarclintc -%.py: %.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml +%: %.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml $(top_srcdir)/config/cdswmllib.wml $(WML) -o $@ $< + chmod u+x $@ diff --git a/modules/bibedit/bin/xmlmarclint.in b/modules/bibedit/bin/xmlmarclint.in new file mode 100644 index 000000000..b14e1c70d --- /dev/null +++ b/modules/bibedit/bin/xmlmarclint.in @@ -0,0 +1,130 @@ +## $Id$ +## BibIndxes bibliographic data, reference and fulltext indexing utility. + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +## read config variables: +#include "config.wml" +#include "configbis.wml" +#include "cdswmllib.wml" + +## start Python: +#! +# -*- coding: utf-8 -*- +## $Id$ +## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. + +""" +XML MARC lint - check your XML MARC files +""" + +import getopt +import string +import sys + +pylibdir = "/python" + +try: + import sys + sys.path.append('%s' % pylibdir) + from cdsware.bibrecord import * +except ImportError, e: + print "Error: %s" % e + import sys + sys.exit(1) + +cmdusage = """command usage: + %s [-v ] xmlfile + or + %s --help + """ % (sys.argv[0], sys.argv[0]) +helpmsg = cmdusage + +try: + opts,args=getopt.getopt(sys.argv[1:], "c:v:h:",["-help"]) +except getopt.GetoptError: + print cmdusage + sys.exit(2) + +badrecords = [] +listofrecs=[] +verbose= 0 + +if len(args)==1: + xmlfile = args[0] +elif len(args)==0: + if len(opts)==1: + if opts[0][0] in ['-help','-h']: + print helpmsg + else: + print cmdusage + sys.exit(2) +else: + print cmdusage + sys.exit(2) + +for opt in opts: + if not opt[0] in ['-v']: + print cmdusage + sys.exit(2) + + if opt[0] == '-v': + try: + verbose = string.atoi(opt[1]) + except ValueError: + print 'Verbose must be an integer' + sys.exit(2) + +global parser +try: + f = open(xmlfile,'r') + xmltext = f.read() + f.close() +except IOError: + print 'File not found\n Please check the name' + import sys + sys.exit(1) + +parser = parser + +listofrecs = create_records(xmltext,0,1) +badr = filter((lambda x: x[1]==0),listofrecs) +badrecords = map((lambda x:x[0]),badr) + + +s='' +e='' + +if verbose: + if verbose <=3: + e=print_errors(concat(map((lambda x:x[2]),listofrecs))) + else: + s=print_recs(badrecords) + e=print_errors(concat(map((lambda x:x[2]),listofrecs))) +else: + if badrecords !=[]: + print 'Bad records detected! For more information, set verbosity.' + sys.exit(1) + +if s!='' or e!='': + print s + print e + sys.exit(1) + + + diff --git a/modules/bibedit/bin/xmlmarclint.wml b/modules/bibedit/bin/xmlmarclint.wml new file mode 100644 index 000000000..b14e1c70d --- /dev/null +++ b/modules/bibedit/bin/xmlmarclint.wml @@ -0,0 +1,130 @@ +## $Id$ +## BibIndxes bibliographic data, reference and fulltext indexing utility. + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +## read config variables: +#include "config.wml" +#include "configbis.wml" +#include "cdswmllib.wml" + +## start Python: +#! +# -*- coding: utf-8 -*- +## $Id$ +## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. + +""" +XML MARC lint - check your XML MARC files +""" + +import getopt +import string +import sys + +pylibdir = "/python" + +try: + import sys + sys.path.append('%s' % pylibdir) + from cdsware.bibrecord import * +except ImportError, e: + print "Error: %s" % e + import sys + sys.exit(1) + +cmdusage = """command usage: + %s [-v ] xmlfile + or + %s --help + """ % (sys.argv[0], sys.argv[0]) +helpmsg = cmdusage + +try: + opts,args=getopt.getopt(sys.argv[1:], "c:v:h:",["-help"]) +except getopt.GetoptError: + print cmdusage + sys.exit(2) + +badrecords = [] +listofrecs=[] +verbose= 0 + +if len(args)==1: + xmlfile = args[0] +elif len(args)==0: + if len(opts)==1: + if opts[0][0] in ['-help','-h']: + print helpmsg + else: + print cmdusage + sys.exit(2) +else: + print cmdusage + sys.exit(2) + +for opt in opts: + if not opt[0] in ['-v']: + print cmdusage + sys.exit(2) + + if opt[0] == '-v': + try: + verbose = string.atoi(opt[1]) + except ValueError: + print 'Verbose must be an integer' + sys.exit(2) + +global parser +try: + f = open(xmlfile,'r') + xmltext = f.read() + f.close() +except IOError: + print 'File not found\n Please check the name' + import sys + sys.exit(1) + +parser = parser + +listofrecs = create_records(xmltext,0,1) +badr = filter((lambda x: x[1]==0),listofrecs) +badrecords = map((lambda x:x[0]),badr) + + +s='' +e='' + +if verbose: + if verbose <=3: + e=print_errors(concat(map((lambda x:x[2]),listofrecs))) + else: + s=print_recs(badrecords) + e=print_errors(concat(map((lambda x:x[2]),listofrecs))) +else: + if badrecords !=[]: + print 'Bad records detected! For more information, set verbosity.' + sys.exit(1) + +if s!='' or e!='': + print s + print e + sys.exit(1) + + + diff --git a/modules/bibedit/etc/.cvsignore b/modules/bibedit/etc/.cvsignore new file mode 100644 index 000000000..a3409fca7 --- /dev/null +++ b/modules/bibedit/etc/.cvsignore @@ -0,0 +1,5 @@ +Makefile +Makefile.in +z_* +*.O +*~ \ No newline at end of file diff --git a/modules/bibedit/etc/MARC21slim.dtd b/modules/bibedit/etc/MARC21slim.dtd new file mode 100644 index 000000000..5edb404b0 --- /dev/null +++ b/modules/bibedit/etc/MARC21slim.dtd @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + diff --git a/modules/bibedit/Makefile.am b/modules/bibedit/etc/Makefile.am similarity index 88% copy from modules/bibedit/Makefile.am copy to modules/bibedit/etc/Makefile.am index 8a0968291..12bc9c459 100644 --- a/modules/bibedit/Makefile.am +++ b/modules/bibedit/etc/Makefile.am @@ -1,22 +1,25 @@ ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -SUBDIRS = doc lib +etcdir = $(sysconfdir)/bibedit/ +etc_DATA = MARC21slim.dtd -CLEANFILES = *~ \ No newline at end of file +EXTRA_DIST = $(etc_DATA) + +CLEANFILES = *~ *.tmp \ No newline at end of file diff --git a/modules/bibedit/lib/Makefile.am b/modules/bibedit/lib/Makefile.am index e717cdbae..737bbbef7 100644 --- a/modules/bibedit/lib/Makefile.am +++ b/modules/bibedit/lib/Makefile.am @@ -1,29 +1,29 @@ ## $Id$ ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. pylibdir = $(libdir)/python/cdsware -pylib_DATA = bibrecord.py +pylib_DATA = bibrecord.py bibrecord_config.py bibrecord_tests.py FILESWML = $(wildcard $(srcdir)/*.wml) EXTRA_DIST = $(FILESWML:$(srcdir)/%=%) CLEANFILES = $(pylib_DATA) *~ *.tmp *.pyc %.py: %.py.wml $(top_srcdir)/config/config.wml $(top_builddir)/config/configbis.wml $(WML) -o $@ $< diff --git a/modules/bibedit/lib/bibrecord.py b/modules/bibedit/lib/bibrecord.py index 7b7e0e3a3..cf966f357 100644 --- a/modules/bibedit/lib/bibrecord.py +++ b/modules/bibedit/lib/bibrecord.py @@ -1,280 +1,966 @@ ## $Id$ -## + ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -## read config variables: -#include "config.wml" -#include "configbis.wml" -#include "cdswmllib.wml" - -## start Python: -#! # -*- coding: utf-8 -*- ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. + """ -BibRecord - xml data processing system for CDSware +BibRecord - XML MARC processing library for CDSware """ -## fill config variables: -pylibdir = "/python" - +### IMPORT INTERESTING MODULES AND XML PARSERS + ## import interesting modules: try: import sys - from xml.dom import Node - from xml.sax.saxutils import escape + import re from zlib import decompress + import_error = 0 except ImportError, e: - print "Error: %s" % e - import sys - sys.exit(1) - + import_error = 1 + imperr = e + +## test available parsers: try: - from Ft.Xml.Domlette import NonvalidatingReader - is4Suite = 1 -except ImportError,e : - try: - from xml.dom.minidom import parseString - is4Suite = 0 - except ImportError,e : - print "Error: %s" % e - import sys - sys.exit(1) + import sys + import string + err=[] +except ImportError, e: + parser = -3 + err1 = e try: - sys.path.append('%s' % pylibdir) - from cdsware.config import * - from cdsware.dbquery import run_sql + from bibrecord_config import * + verbose = cfg_bibrecord_default_verbose_level + correct = cfg_bibrecord_default_correct + parsers = cfg_bibrecord_parsers_available except ImportError, e: - print "Error: %s" % e - import sys - sys.exit(1) + parser = -2 + verbose = 0 + correct = 0 + parsers = [] -def create_record(xmltext): - """ - creates a record object and returns it - uses 4Suite domlette if installed else uses xml.dom.minidom - """ - global is4Suite - try: - if is4Suite: - rec = create_record_4suite(xmltext) +if parsers == []: + print 'No parser available' + sys.exit(2) +else: + j,i=1,1 + if 2 in parsers: + try: + import pyRXP + parser = 2 + ## function to show the pyRXP_parser warnings ## + def warnCB(s): + """ function used to treat the PyRXP parser warnings""" + global err + err.append((0,'Parse warning:\n'+s)) + + err2 = "" + except ImportError,e : + err2=e + i=0 + elif 1 in parsers: + try: + from Ft.Xml.Domlette import NonvalidatingReader + parser = 1 + except ImportError,e : + err2=e + j=0 + else: + try: + from xml.dom.minidom import parseString + parser = 0 + except ImportError,e : + err2=e + parser = -1 + + if not i: + if 1 in parsers: + try: + from Ft.Xml.Domlette import NonvalidatingReader + parser = 1 + except ImportError,e : + err2=e + j=0 + elif 0 in parsers: + try: + from xml.dom.minidom import parseString + parser = 0 + except ImportError,e : + err2=e + parser = -1 else: - rec = create_record_minidom(xmltext) - except: - return None - return rec + parser = -1 -def create_record_minidom(xmltext): - """ - creates a record object and returns it - uses xml.dom.minidom - """ - record = {} - record["fields"] = [] - dom = parseString(xmltext) - root = dom.childNodes[0] - for controlfield in get_childs_by_tag_name(root,"controlfield"): - record["fields"].append(create_field(controlfield.getAttribute("tag"),get_string_value(controlfield))) - for datafield in get_childs_by_tag_name(root,"datafield"): - record["fields"].append(create_field(datafield.getAttribute("tag"),"",datafield.getAttribute("ind1"),datafield.getAttribute("ind2"))) - myfield = record["fields"][len(record["fields"])-1] - for subfield in get_childs_by_tag_name(datafield,"subfield"): - field_add_subfield(myfield,subfield.getAttribute("code"),get_string_value(subfield)) - return record + if not j: + if 0 in parsers: + try: + from xml.dom.minidom import parseString + parser = 0 + except ImportError,e : + err2=e + parser = -1 + else: + parser = -1 + +### INTERFACE / VISIBLE FUNCTIONS + +def create_records(xmltext,verbose=verbose,correct=correct): + """ + creates a list of records + """ + global import_error + err=[] + if import_error == 1: + err.append((6,imperr)) + else: + if sys.version >= '2.3': + pat = r".*?" + p = re.compile(pat,re.DOTALL) # DOTALL - to ignore whitespaces + list = p.findall(xmltext) + else: + l = xmltext.split('') + n=len(l) + ind = (l[n-1]).rfind('') + aux = l[n-1][:ind+9] + l[n-1] = aux + list=[] + for s in l: + if s != '': + i = -1 + while (s[i].isspace()): + i=i-1 + if i == -1:#in case there are no spaces at the end + i=len(s)-1 + if s[:i+1].endswith(''): + list.append(''+s) + listofrec = map((lambda x:create_record(x,verbose,correct)),list) + return listofrec + return [] -def create_record_4suite(xmltext): +# Record :: {tag : [Field]} +# Field :: (Subfields,ind1,ind2,value) +# Subfields :: [(code,value)] + +def create_record(xmltext,verbose = verbose, correct=correct): """ creates a record object and returns it - uses 4Suite domlette + uses pyRXP if installed else uses 4Suite domlette or xml.dom.minidom """ - record = {} - record["fields"] = [] - dom = NonvalidatingReader.parseString(xmltext,"urn:dummy") - root = dom.childNodes[0] - for controlfield in get_childs_by_tag_name(root,"controlfield"): - record["fields"].append(create_field(controlfield.getAttributeNS(None,"tag"),get_string_value(controlfield))) - for datafield in get_childs_by_tag_name(root,"datafield"): - record["fields"].append(create_field(datafield.getAttributeNS(None,"tag"),"",datafield.getAttributeNS(None,"ind1"),datafield.getAttributeNS(None,"ind2"))) - myfield = record["fields"][len(record["fields"])-1] - for subfield in get_childs_by_tag_name(datafield,"subfield"): - field_add_subfield(myfield,subfield.getAttributeNS(None,"code"),get_string_value(subfield)) - return record + global parser -def record_order_fields(rec,fun="order_by_tag"): - """orders field inside record 'rec' according to a function""" - rec["fields"].sort(eval(fun)) - return + (i,errors) = testImports(parser) -def record_order_subfields(rec,fun="order_by_code"): - """orders subfield inside record 'rec' according to a function""" - for field in rec['fields']: - field['subfields'].sort(eval(fun)) - return + if i==0: + return (None,0,errors) + + try: + if parser==2: + ## the following is because of DTD validation + t = """ + + \n""" % cfg_marc21_dtd + t = "%s%s" % (t,xmltext) + t = "%s" % t + xmltext = t + (rec,er) = create_record_RXP(xmltext,verbose,correct) + elif parser: + (rec,er) = create_record_4suite(xmltext,verbose,correct) + else: + (rec,er) = create_record_minidom(xmltext,verbose,correct) + + errs = warnings(er) + except: + errs = warnings(concat(err)) + return (None,0,errs) + if errs == []: + return (rec,1,errs) + else: + return (rec,0,errs) + + + def record_get_field_instances(rec): """returns the list of fields stored in record 'rec'""" - return rec["fields"] + return rec.items() + def record_has_field(rec,tag): """checks whether record 'rec' contains tag 'tag'""" - for field in rec["fields"]: - if field["tag"] == tag: - return 1 - return 0 + return rec.has_key(tag) def record_add_field(rec,tag,value,ind1="",ind2=""): """ adds new field defined by the tag|value|ind1|ind2 parameters to record 'rec' returns the new field """ - newfield = create_field(tag,value,ind1,ind2) - rec["fields"].append(newfield) - return newfield + + val=rec.values() + if val != []: + ord = max([f[4] for x in val for f in x]) + else: + ord = 1 + newfield = create_field(value,ind1,ind2,[],ord) + + if rec.has_key(tag): + rec[tag].append(newfield) + else: + rec[tag] = [newfield] + + return newfield def record_delete_field(rec,tag,ind1="",ind2=""): """ delete all fields defined with marc tag 'tag' and indicators 'ind1' and 'ind2' from record 'rec' """ newlist = [] - for field in rec["fields"]: - if not (field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2): - newlist.append(field) - rec["fields"] = newlist + if rec.has_key(tag): + for field in rec[tag]: + if not (field[1]==ind1 and field[2]==ind2): + newlist.append(field) + rec[tag] = newlist def record_get_field_value(rec,tag,ind1="",ind2="",code=""): """ retrieves the value of the first field containing tag 'tag' and indicators 'ind1' and 'ind2' inside record 'rec'. Returns the found value as a string. If no matching field is found returns the empty string. + if the tag has a '%', it will retrieve the value of first field containg tag, which first characters are those before '%' in tag. The ind1, ind2 and code parameters will be ignored """ - for field in rec["fields"]: - if field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2: - if field["value"] != "": - return field["value"] - else: - for subfield in field["subfields"]: - if subfield["code"]==code: - return subfield["value"] + + s = tag.split('%') + if len(s) > 1: + t = s[0] + keys=rec.keys() + tags=[k for k in keys if k.startswith(t)] + for tag in tags: + fields = rec[tag] + for field in fields: + if field[3] != "": + return field[3] + else: + for subfield in field[0]: + return subfield[1] + else: + if rec.has_key(tag): + fields = rec[tag] + for field in fields: + if field[1]==ind1 and field[2]==ind2: + if field[3] != "": + return field[3] + else: + for subfield in field[0]: + if subfield[0]==code: + return subfield[1] + return "" def record_get_field_values(rec,tag,ind1="",ind2="",code=""): """ retrieves the values of all the fields containing tag 'tag' and indicators 'ind1' and 'ind2' inside record 'rec'. Returns the found values as a list. If no matching field is found returns an empty list. + if the tag has a '%', it will retrieve the value of all fields containg tag, which first characters are those before '%' in tag. The ind1, ind2 and code parameters will be ignored """ tmp = [] - for field in rec["fields"]: - if field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2: - if field["value"] != "": - tmp.append(field["value"]) - else: - for subfield in field["subfields"]: - if subfield["code"]==code: - tmp.append(subfield["value"]) + + s = tag.split('%') + if len(s) > 1: + t = s[0] + keys=rec.keys() + tags=[k for k in keys if k.startswith(t)] + for tag in tags: + fields = rec[tag] + for field in fields: + if field[3] != "": + tmp.append(field[3]) + else: + for subfield in field[0]: + tmp.append(subfield[1]) + else: + if rec.has_key(tag): + fields = rec[tag] + for field in fields: + if field[1]==ind1 and field[2]==ind2: + if field[3] != "": + tmp.append(field[3]) + else: + for subfield in field[0]: + if subfield[0]==code: + tmp.append(subfield[1]) + return tmp +def print_rec(rec,format=1): + """prints a record + format = 1 -- XML + format = 2 -- HTML (not implemented) + """ + + if format==1: + text = record_xml_output(rec) + else: + return '' + + return text + +def print_recs(listofrec,format=1): + """prints a list of records + format = 1 -- XML + format = 2 -- HTML (not implemented) + if 'listofrec' is not a list it returns empty string + """ + text = "" + + if type(listofrec).__name__ !='list': + return "" + else: + for rec in listofrec: + text = "%s\n%s" % (text,print_rec(rec,format)) + return text + def record_xml_output(rec): """generates the XML for record 'rec' and returns it as a string""" xmltext = "\n" - record_order_fields(rec) - for field in rec["fields"]: - xmltext = "%s%s" % (xmltext,field_xml_output(field)) + #add the tag 'tag' to each field in rec[tag] + fields=[] + for tag in rec.keys(): + for field in rec[tag]: + fields.append((tag,field)) + + record_order_fields(fields) + + for field in fields: + xmltext = "%s%s" % (xmltext,field_xml_output(field[1],field[0]))#field[0]=tag xmltext = "%s" % xmltext return xmltext + +def records_xml_output(listofrec): + """generates the XML for the list of records 'listofrec' and returns it as a string""" + xmltext = """ + + \n""" % cfg_marc21_dtd + + for rec in listofrec: + xmltext = "%s%s" % (xmltext, record_xml_output(rec)) + xmltext = "%s" % xmltext + return xmltext + +def field_get_subfield_instances(field): + """returns the list of subfields associated with field 'field'""" + return field[0] + +def field_add_subfield(field,code,value): + """adds a subfield to field 'field'""" + field[0].append(create_subfield(code,value)) + + +### IMPLEMENTATION / INVISIBLE FUNCTIONS + +def create_record_RXP(xmltext, verbose=verbose, correct=correct): + """ + creates a record object and returns it + uses the RXP parser + + If verbose>3 then the parser will be strict and will stop in case of well-formedness errors + or DTD errors + If verbose=0, the parser will not give warnings + If 0 We will try to correct errors such as missing attributtes + correct = 0 -> there will not be any attempt to correct errors + + """ + + record = {} + global err + + ord = 1 # this is needed because of the record_xml_output function, where we need to know + # the order of the fields + + + TAG, ATTRS,CHILD_LIST = range(3) + + if verbose > 3: + p = pyRXP.Parser(ErrorOnValidityErrors=1, + ProcessDTD=1, + ErrorOnUnquotedAttributeValues=1, + warnCB = warnCB, + srcName='string input') + else: + p = pyRXP.Parser(ErrorOnValidityErrors=0, + ProcessDTD=1, + ErrorOnUnquotedAttributeValues=0, + warnCB = warnCB, + srcName='string input') + + + if correct: + (rec,e) = wash(xmltext) + err.extend(e) + return (rec,e) + + + root1=p(xmltext) #root = (tagname, attr_dict, child_list, reserved) + + if root1[0]=='collection': + recs = [t for t in root1[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="record"] + if recs !=[]: + root = recs[0] + else: + root = None + else: + root=root1 + + + + # get childs of 'controlfield' + childs_controlfield = [] + if not root[2]==None: + childs_controlfield =[t for t in root[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="controlfield"] + + # get childs of 'datafield' + childs_datafield = [] + if not root[CHILD_LIST]==None: + childs_datafield =[t for t in root[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="datafield"] + + for controlfield in childs_controlfield: + s=controlfield[ATTRS]["tag"] + value='' + if not controlfield==None: + value=''.join([ n for n in controlfield[CHILD_LIST] if type(n).__name__ == 'str']) + + name = type(value).__name__ + + if name in ["int","long"] : + st = str(value) + elif name == 'str': + st = value + else: + if verbose: + err.append((7,'Type found: ' + name)) + st = "" # the type of value is not correct. (user insert something like a list...) + + + field = ([],"","",st,ord) #field = (subfields, ind1, ind2,value,ord) + + if record.has_key(s): + record[s].append(field) + else: + record[s]=[field] + + ord = ord+1 + + for datafield in childs_datafield: + + #create list of subfields + subfields = [] + + childs_subfield = [] + if not datafield[CHILD_LIST]==None: + childs_subfield =[t for t in datafield[CHILD_LIST] if type(t).__name__=='tuple' and t[0]=="subfield"] + + for subfield in childs_subfield: + value='' + if not subfield==None: + value=''.join([ n for n in subfield[CHILD_LIST] if type(n).__name__ == 'str']) + #get_string_value(subfield) + if subfield[ATTRS].has_key('code'): + subfields.append((subfield[ATTRS]["code"],value)) + else: + subfields.append(('!',value)) + + #create field + + if datafield[ATTRS].has_key('tag'): + s = datafield[ATTRS]["tag"] + else: + s = '!' + + if datafield[ATTRS].has_key('ind1'): + ind1 = datafield[ATTRS]["ind1"] + else: + ind1 = '!' + + if datafield[ATTRS].has_key('ind2'): + ind2 = datafield[ATTRS]["ind2"] + else: + ind2 = '!' + + field = (subfields,ind1,ind2,"",ord) + + if record.has_key(s): + record[s].append(field) + else: + record[s]=[field] + + ord = ord+1 + + return (record,err) + + -def create_field(tag,value,ind1="",ind2=""): +def create_record_minidom(xmltext, verbose=verbose, correct=correct): + """ + creates a record object and returns it + uses xml.dom.minidom + """ + + record = {} + ord=1 + global err + + if correct: + xmlt = xmltext + (rec,e) = wash(xmlt,0) + err.append(e) + return (rec,err) + + dom = parseString(xmltext) + root = dom.childNodes[0] + + for controlfield in get_childs_by_tag_name(root,"controlfield"): + s = controlfield.getAttribute("tag") + + text_nodes = controlfield.childNodes + v = u''.join([ n.data for n in text_nodes ]) + + name = type(v).__name__ + if (name in ["int","long"]) : + field = ([],"","",str(v),ord) # field = (subfields, ind1, ind2,value) + elif name == 'str': + field = ([],"","",v,ord) + else: + if verbose: + err.append((7,'Type found: ' + name)) + + field = ([],"","","",ord)# the type of value is not correct. (user insert something like a list...) + + if record.has_key(s): + record[s].append(field) + else: + record[s]=[field] + ord=ord+1 + + for datafield in get_childs_by_tag_name(root,"datafield"): + subfields = [] + + for subfield in get_childs_by_tag_name(datafield,"subfield"): + text_nodes = subfield.childNodes + v = u''.join([ n.data for n in text_nodes ]) + code = subfield.getAttributeNS(None,'code') + if code != '': + subfields.append((code,v)) + else: + subfields.append(('!',v)) + + s = datafield.getAttribute("tag") + if s == '': + s = '!' + + ind1 = datafield.getAttribute("ind1") + + ind2 = datafield.getAttribute("ind2") + + + if record.has_key(s): + record[s].append((subfields,ind1,ind2,"",ord)) + else: + record[s]=[(subfields,ind1,ind2,"",ord)] + ord=ord+1 + + return (record,err) + + +def create_record_4suite(xmltext,verbose=verbose,correct=correct): + """ + creates a record object and returns it + uses 4Suite domlette + """ + record = {} + global err + + if correct: + xmlt = xmltext + (rec,e) = wash(xmlt,1) + err.append(e) + return (rec,e) + + dom = NonvalidatingReader.parseString(xmltext,"urn:dummy") + + root = dom.childNodes[0] + + ord=1 + + for controlfield in get_childs_by_tag_name(root,"controlfield"): + s = controlfield.getAttributeNS(None,"tag") + + text_nodes = controlfield.childNodes + v = u''.join([ n.data for n in text_nodes ]) + + name = type(v).__name__ + if (name in ["int","long"]) : + field = ([],"","",str(v),ord) # field = (subfields, ind1, ind2,value) + elif name == 'str': + field = ([],"","",v,ord) + else: + if verbose: + err.append((7,'Type found: ' + name)) + + field = ([],"","","",ord)# the type of value is not correct. (user insert something like a list...) + + + if record.has_key(s): + record[s].append(field) + else: + record[s]=[field] + ord=ord+1 + + + for datafield in get_childs_by_tag_name(root,"datafield"): + subfields = [] + + for subfield in get_childs_by_tag_name(datafield,"subfield"): + text_nodes = subfield.childNodes + v = u''.join([ n.data for n in text_nodes ]) + + code = subfield.getAttributeNS(None,'code') + if code != '': + subfields.append((code,v)) + else: + subfields.append(('!',v)) + + s = datafield.getAttributeNS(None,"tag") + if s == '': + s = '!' + + ind1 = datafield.getAttributeNS(None,"ind1") + + ind2 = datafield.getAttributeNS(None,"ind2") + + + if record.has_key(s): + record[s].append((subfields,ind1,ind2,"",ord)) + else: + record[s]=[(subfields,ind1,ind2,"",ord)] + ord=ord+1 + + return (record,err) + + + + +def record_order_fields(rec,fun="order_by_ord"): + """orders field inside record 'rec' according to a function""" + rec.sort(eval(fun)) + return + +def record_order_subfields(rec,fun="order_by_code"): + """orders subfield inside record 'rec' according to a function""" + for tag in rec: + for field in rec[tag]: + field[0].sort(eval(fun)) + return + + + +def concat(list): + """concats a list of lists""" + newl = [] + for l in list: + newl.extend(l) + return newl + + + + +def create_field(value,ind1="",ind2="",subfields=[],ord=-1): """ creates a field object and returns it""" - field = {} - field["subfields"] = [] - field["tag"] = tag - field["ind1"] = ind1 - field["ind2"] = ind2 - if type(value).__name__ in ["int","long"] : - field["value"] = str(value) + + name = type(value).__name__ + if name in ["int","long"] : + s = str(value) + elif name == 'str': + s = value else: - field["value"] = value - return field + err.append((7,'Type found: ' + name)) + s="" + -def field_get_subfield_instances(field): - """returns the list of subfields associated with field 'field'""" - return field["subfields"] + field = (subfields,ind1,ind2,s,ord) + return field + def field_add_subfield(field,code,value): """adds a subfield to field 'field'""" - field["subfields"].append(create_subfield(code,value)) + field[0].append(create_subfield(code,value)) -def field_xml_output(field): +def field_xml_output(field,tag): """generates the XML for field 'field' and returns it as a string""" xmltext = "" - if field["value"] != "": - xmltext = "%s %s\n" % (xmltext,field["tag"],escape(field["value"])) + if field[3] != "": + xmltext = "%s %s\n" % (xmltext,tag,encode_for_xml(field[3])) else: - xmltext = "%s \n" % (xmltext,field["tag"],field["ind1"],field["ind2"]) - for subfield in field["subfields"]: + xmltext = "%s \n" % (xmltext,tag,field[1],field[2]) + for subfield in field[0]: xmltext = "%s%s" % (xmltext,subfield_xml_output(subfield)) xmltext = "%s \n" % xmltext return xmltext def create_subfield(code,value): """ creates a subfield object and returns it""" - subfield = {} - subfield["code"] = code if type(value).__name__ in ["int","long"]: - subfield["value"] = str(value) + s = str(value) else: - subfield["value"] = value + s = value + subfield = (code, s) + return subfield def subfield_xml_output(subfield): """generates the XML for a subfield object and return it as a string""" - xmltext = " %s\n" % (subfield["code"],escape(subfield["value"])) + xmltext = " %s\n" % (subfield[0],encode_for_xml(subfield[1])) return xmltext -def order_by_tag(field1, field2): - """function used to order the fields according to their tag value""" - return cmp(field1["tag"], field2["tag"]) +def order_by_ord(field1, field2): + """function used to order the fields according to their ord value""" + return cmp(field1[1][4], field2[1][4]) def order_by_code(subfield1,subfield2): """function used to order the subfields according to their code value""" - return cmp(subfield1['code'],subfield2['code']) + return cmp(subfield1[0],subfield2[0]) def get_childs_by_tag_name(node, local): """retrieves all childs from node 'node' with name 'local' and returns them as a list""" - res=[] - for child in node.childNodes: - if child.localName == local: - res.append(child) + cNodes = list(node.childNodes) + res = [child for child in cNodes if child.nodeName==local] return res def get_string_value(node): """gets all child text nodes of node 'node' and returns them as a unicode string""" text_nodes = node.childNodes return u''.join([ n.data for n in text_nodes ]) -def db_get_xml_record(recid,format='xm'): - """extracts record #newid xml representation from the db and returns it as a string""" - res = run_sql("SELECT value FROM bibfmt WHERE format=%s and id_bibrec=%s",(format,recid,)) - if len(res) > 0: - return decompress(res[0][0]) - return "" + + +def get_childs_by_tag_name_RXP(listofchilds,tag): + """retrieves all childs from 'listofchilds' with tag name 'tag' and returns them as a list. + listofchilds is a list returned by the RXP parser + """ + l=[] + if not listofchilds==None: + l =[t for t in listofchilds if type(t).__name__=='tuple' and t[0]==tag] + + return l + + +def getAttribute_RXP(root, attr): + """ returns the attributte 'attr' from root 'root' + root is a node returned by RXP parser + """ + try: + return u''.join(root[1][attr]) + except KeyError,e: + return "" + + +def get_string_value_RXP(node): + """gets all child text nodes of node 'node' and returns them as a unicode string""" + if not node==None: + return ''.join([ n for n in node[2] if type(n).__name__ == 'str']) + else: + return "" + + +def encode_for_xml(s): + "Encode special chars in string so that it would be XML-compliant." + s = string.replace(s, '&', '&') + s = string.replace(s, '<', '<') + s = string.replace(s, '>', '>') + return s + + +def print_errors(list): + """ creates a unique string with the strings in list, using '\n' as a separator """ + text="" + + for l in list: + text = '%s\n%s'% (text,l) + + return text + + + +def wash(xmltext, parser=2): + """ + Check the structure of the xmltext. Returns a record structure and a list of errors. + parser = 1 - 4_suite + parser = 2 - pyRXP + parser = 0 - minidom + """ + + errors=[] + i,e1 = tagclose('datafield',xmltext) + j,e2 = tagclose('controlfield',xmltext) + k,e3 = tagclose('subfield',xmltext) + w,e4 = tagclose('record',xmltext) + errors.extend(e1) + errors.extend(e2) + errors.extend(e3) + errors.extend(e4) + + if i and j and k and w and parser!=-3: + if parser==1: + (rec,ee) = create_record_4suite(xmltext,0,0) + elif parser==2: + (rec,ee) = create_record_RXP(xmltext,0,0) + else: + (rec,ee) = create_record_minidom(xmltext,0,0) + else: + return (None,errors) - \ No newline at end of file + + + keys = rec.keys() + + for tag in keys: + upper_bound = '999' + n = len(tag) + + if n>3: + i=n-3 + while i>0: + upper_bound = '%s%s' % ('0',upper_bound) + i = i-1 + + if tag == '!': # missing tag + errors.append((1, '(field number(s): ' + ([f[4] for f in rec[tag]]).__str__()+')')) + v=rec[tag] + rec.__delitem__(tag) + rec['000'] = v + tag = '000' + elif not ("001" <= tag <=upper_bound): + errors.append(2) + v = rec[tag] + rec.__delitem__(tag) + rec['000'] = v + tag = '000' + + fields =[] + for field in rec[tag]: + if field[0]==[] and field[3]=='': ## datafield without any subfield + errors.append((8,'(field number: '+field[4].__str__()+')')) + + subfields=[] + for subfield in field[0]: + if subfield[0]=='!': + errors.append((3,'(field number: '+field[4].__str__()+')')) + newsub = ('',subfield[1]) + else: + newsub = subfield + subfields.append(newsub) + + if field[1]=='!': + errors.append((4,'(field number: '+field[4].__str__()+')')) + ind1 = "" + else: + ind1 = field[1] + + if field[2]=='!': + errors.append((5,'(field number: '+field[4].__str__()+')')) + ind2 = "" + else: + ind2=field[2] + + newf = (subfields,ind1,ind2,field[3],field[4]) + fields.append(newf) + + rec[tag]=fields + + return (rec,errors) + + +def tagclose(tagname,xmltext): + """ checks if an XML document does not hae any missing tag with name tagname + """ + import re + errors=[] + pat_open = '<'+tagname+'.*?>' + pat_close = '' + p_open = re.compile(pat_open,re.DOTALL) # DOTALL - to ignore whitespaces + p_close = re.compile(pat_close,re.DOTALL) + list1 = p_open.findall(xmltext) + list2 = p_close.findall(xmltext) + + if len(list1)!=len(list2): + errors.append((99,'(Tagname : ' + tagname + ')')) + return (0,errors) + else: + return (1,errors) + + + +def testImports(c): + """ Test if the import statements did not failed""" + errors=[] + global err1,err2 + + if c==-1: + i = 0 + errors.append((6,err2)) + elif c == -3: + i=0 + errors.append((6,err1)) + else: + i=1 + return (i,errors) + + +def warning(code): + """ It returns a warning message of code 'code'. + If code = (cd, str) it returns the warning message of code 'cd' + and appends str at the end""" + + ws = cfg_bibrecord_warning_msgs + s='' + + if type(code).__name__ == 'str': + return code + + if type(code).__name__ == 'tuple': + if type(code[1]).__name__ == 'str': + s = code[1] + c = code[0] + else: + c = code + if ws.has_key(c): + return ws[c]+s + else: + return "" + +def warnings(l): + """it applies the function warning to every element in l""" + list = [] + for w in l: + list.append(warning(w)) + return list + diff --git a/modules/bibedit/lib/bibrecord.py.wml b/modules/bibedit/lib/bibrecord.py.wml index 7b7e0e3a3..cf966f357 100644 --- a/modules/bibedit/lib/bibrecord.py.wml +++ b/modules/bibedit/lib/bibrecord.py.wml @@ -1,280 +1,966 @@ ## $Id$ -## + ## This file is part of the CERN Document Server Software (CDSware). ## Copyright (C) 2002 CERN. ## ## The CDSware is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## The CDSware is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with CDSware; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -## read config variables: -#include "config.wml" -#include "configbis.wml" -#include "cdswmllib.wml" - -## start Python: -#! # -*- coding: utf-8 -*- ## $Id$ ## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES. + """ -BibRecord - xml data processing system for CDSware +BibRecord - XML MARC processing library for CDSware """ -## fill config variables: -pylibdir = "/python" - +### IMPORT INTERESTING MODULES AND XML PARSERS + ## import interesting modules: try: import sys - from xml.dom import Node - from xml.sax.saxutils import escape + import re from zlib import decompress + import_error = 0 except ImportError, e: - print "Error: %s" % e - import sys - sys.exit(1) - + import_error = 1 + imperr = e + +## test available parsers: try: - from Ft.Xml.Domlette import NonvalidatingReader - is4Suite = 1 -except ImportError,e : - try: - from xml.dom.minidom import parseString - is4Suite = 0 - except ImportError,e : - print "Error: %s" % e - import sys - sys.exit(1) + import sys + import string + err=[] +except ImportError, e: + parser = -3 + err1 = e try: - sys.path.append('%s' % pylibdir) - from cdsware.config import * - from cdsware.dbquery import run_sql + from bibrecord_config import * + verbose = cfg_bibrecord_default_verbose_level + correct = cfg_bibrecord_default_correct + parsers = cfg_bibrecord_parsers_available except ImportError, e: - print "Error: %s" % e - import sys - sys.exit(1) + parser = -2 + verbose = 0 + correct = 0 + parsers = [] -def create_record(xmltext): - """ - creates a record object and returns it - uses 4Suite domlette if installed else uses xml.dom.minidom - """ - global is4Suite - try: - if is4Suite: - rec = create_record_4suite(xmltext) +if parsers == []: + print 'No parser available' + sys.exit(2) +else: + j,i=1,1 + if 2 in parsers: + try: + import pyRXP + parser = 2 + ## function to show the pyRXP_parser warnings ## + def warnCB(s): + """ function used to treat the PyRXP parser warnings""" + global err + err.append((0,'Parse warning:\n'+s)) + + err2 = "" + except ImportError,e : + err2=e + i=0 + elif 1 in parsers: + try: + from Ft.Xml.Domlette import NonvalidatingReader + parser = 1 + except ImportError,e : + err2=e + j=0 + else: + try: + from xml.dom.minidom import parseString + parser = 0 + except ImportError,e : + err2=e + parser = -1 + + if not i: + if 1 in parsers: + try: + from Ft.Xml.Domlette import NonvalidatingReader + parser = 1 + except ImportError,e : + err2=e + j=0 + elif 0 in parsers: + try: + from xml.dom.minidom import parseString + parser = 0 + except ImportError,e : + err2=e + parser = -1 else: - rec = create_record_minidom(xmltext) - except: - return None - return rec + parser = -1 -def create_record_minidom(xmltext): - """ - creates a record object and returns it - uses xml.dom.minidom - """ - record = {} - record["fields"] = [] - dom = parseString(xmltext) - root = dom.childNodes[0] - for controlfield in get_childs_by_tag_name(root,"controlfield"): - record["fields"].append(create_field(controlfield.getAttribute("tag"),get_string_value(controlfield))) - for datafield in get_childs_by_tag_name(root,"datafield"): - record["fields"].append(create_field(datafield.getAttribute("tag"),"",datafield.getAttribute("ind1"),datafield.getAttribute("ind2"))) - myfield = record["fields"][len(record["fields"])-1] - for subfield in get_childs_by_tag_name(datafield,"subfield"): - field_add_subfield(myfield,subfield.getAttribute("code"),get_string_value(subfield)) - return record + if not j: + if 0 in parsers: + try: + from xml.dom.minidom import parseString + parser = 0 + except ImportError,e : + err2=e + parser = -1 + else: + parser = -1 + +### INTERFACE / VISIBLE FUNCTIONS + +def create_records(xmltext,verbose=verbose,correct=correct): + """ + creates a list of records + """ + global import_error + err=[] + if import_error == 1: + err.append((6,imperr)) + else: + if sys.version >= '2.3': + pat = r".*?" + p = re.compile(pat,re.DOTALL) # DOTALL - to ignore whitespaces + list = p.findall(xmltext) + else: + l = xmltext.split('') + n=len(l) + ind = (l[n-1]).rfind('') + aux = l[n-1][:ind+9] + l[n-1] = aux + list=[] + for s in l: + if s != '': + i = -1 + while (s[i].isspace()): + i=i-1 + if i == -1:#in case there are no spaces at the end + i=len(s)-1 + if s[:i+1].endswith(''): + list.append(''+s) + listofrec = map((lambda x:create_record(x,verbose,correct)),list) + return listofrec + return [] -def create_record_4suite(xmltext): +# Record :: {tag : [Field]} +# Field :: (Subfields,ind1,ind2,value) +# Subfields :: [(code,value)] + +def create_record(xmltext,verbose = verbose, correct=correct): """ creates a record object and returns it - uses 4Suite domlette + uses pyRXP if installed else uses 4Suite domlette or xml.dom.minidom """ - record = {} - record["fields"] = [] - dom = NonvalidatingReader.parseString(xmltext,"urn:dummy") - root = dom.childNodes[0] - for controlfield in get_childs_by_tag_name(root,"controlfield"): - record["fields"].append(create_field(controlfield.getAttributeNS(None,"tag"),get_string_value(controlfield))) - for datafield in get_childs_by_tag_name(root,"datafield"): - record["fields"].append(create_field(datafield.getAttributeNS(None,"tag"),"",datafield.getAttributeNS(None,"ind1"),datafield.getAttributeNS(None,"ind2"))) - myfield = record["fields"][len(record["fields"])-1] - for subfield in get_childs_by_tag_name(datafield,"subfield"): - field_add_subfield(myfield,subfield.getAttributeNS(None,"code"),get_string_value(subfield)) - return record + global parser -def record_order_fields(rec,fun="order_by_tag"): - """orders field inside record 'rec' according to a function""" - rec["fields"].sort(eval(fun)) - return + (i,errors) = testImports(parser) -def record_order_subfields(rec,fun="order_by_code"): - """orders subfield inside record 'rec' according to a function""" - for field in rec['fields']: - field['subfields'].sort(eval(fun)) - return + if i==0: + return (None,0,errors) + + try: + if parser==2: + ## the following is because of DTD validation + t = """ + + \n""" % cfg_marc21_dtd + t = "%s%s" % (t,xmltext) + t = "%s" % t + xmltext = t + (rec,er) = create_record_RXP(xmltext,verbose,correct) + elif parser: + (rec,er) = create_record_4suite(xmltext,verbose,correct) + else: + (rec,er) = create_record_minidom(xmltext,verbose,correct) + + errs = warnings(er) + except: + errs = warnings(concat(err)) + return (None,0,errs) + if errs == []: + return (rec,1,errs) + else: + return (rec,0,errs) + + + def record_get_field_instances(rec): """returns the list of fields stored in record 'rec'""" - return rec["fields"] + return rec.items() + def record_has_field(rec,tag): """checks whether record 'rec' contains tag 'tag'""" - for field in rec["fields"]: - if field["tag"] == tag: - return 1 - return 0 + return rec.has_key(tag) def record_add_field(rec,tag,value,ind1="",ind2=""): """ adds new field defined by the tag|value|ind1|ind2 parameters to record 'rec' returns the new field """ - newfield = create_field(tag,value,ind1,ind2) - rec["fields"].append(newfield) - return newfield + + val=rec.values() + if val != []: + ord = max([f[4] for x in val for f in x]) + else: + ord = 1 + newfield = create_field(value,ind1,ind2,[],ord) + + if rec.has_key(tag): + rec[tag].append(newfield) + else: + rec[tag] = [newfield] + + return newfield def record_delete_field(rec,tag,ind1="",ind2=""): """ delete all fields defined with marc tag 'tag' and indicators 'ind1' and 'ind2' from record 'rec' """ newlist = [] - for field in rec["fields"]: - if not (field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2): - newlist.append(field) - rec["fields"] = newlist + if rec.has_key(tag): + for field in rec[tag]: + if not (field[1]==ind1 and field[2]==ind2): + newlist.append(field) + rec[tag] = newlist def record_get_field_value(rec,tag,ind1="",ind2="",code=""): """ retrieves the value of the first field containing tag 'tag' and indicators 'ind1' and 'ind2' inside record 'rec'. Returns the found value as a string. If no matching field is found returns the empty string. + if the tag has a '%', it will retrieve the value of first field containg tag, which first characters are those before '%' in tag. The ind1, ind2 and code parameters will be ignored """ - for field in rec["fields"]: - if field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2: - if field["value"] != "": - return field["value"] - else: - for subfield in field["subfields"]: - if subfield["code"]==code: - return subfield["value"] + + s = tag.split('%') + if len(s) > 1: + t = s[0] + keys=rec.keys() + tags=[k for k in keys if k.startswith(t)] + for tag in tags: + fields = rec[tag] + for field in fields: + if field[3] != "": + return field[3] + else: + for subfield in field[0]: + return subfield[1] + else: + if rec.has_key(tag): + fields = rec[tag] + for field in fields: + if field[1]==ind1 and field[2]==ind2: + if field[3] != "": + return field[3] + else: + for subfield in field[0]: + if subfield[0]==code: + return subfield[1] + return "" def record_get_field_values(rec,tag,ind1="",ind2="",code=""): """ retrieves the values of all the fields containing tag 'tag' and indicators 'ind1' and 'ind2' inside record 'rec'. Returns the found values as a list. If no matching field is found returns an empty list. + if the tag has a '%', it will retrieve the value of all fields containg tag, which first characters are those before '%' in tag. The ind1, ind2 and code parameters will be ignored """ tmp = [] - for field in rec["fields"]: - if field["tag"]==tag and field["ind1"]==ind1 and field["ind2"]==ind2: - if field["value"] != "": - tmp.append(field["value"]) - else: - for subfield in field["subfields"]: - if subfield["code"]==code: - tmp.append(subfield["value"]) + + s = tag.split('%') + if len(s) > 1: + t = s[0] + keys=rec.keys() + tags=[k for k in keys if k.startswith(t)] + for tag in tags: + fields = rec[tag] + for field in fields: + if field[3] != "": + tmp.append(field[3]) + else: + for subfield in field[0]: + tmp.append(subfield[1]) + else: + if rec.has_key(tag): + fields = rec[tag] + for field in fields: + if field[1]==ind1 and field[2]==ind2: + if field[3] != "": + tmp.append(field[3]) + else: + for subfield in field[0]: + if subfield[0]==code: + tmp.append(subfield[1]) + return tmp +def print_rec(rec,format=1): + """prints a record + format = 1 -- XML + format = 2 -- HTML (not implemented) + """ + + if format==1: + text = record_xml_output(rec) + else: + return '' + + return text + +def print_recs(listofrec,format=1): + """prints a list of records + format = 1 -- XML + format = 2 -- HTML (not implemented) + if 'listofrec' is not a list it returns empty string + """ + text = "" + + if type(listofrec).__name__ !='list': + return "" + else: + for rec in listofrec: + text = "%s\n%s" % (text,print_rec(rec,format)) + return text + def record_xml_output(rec): """generates the XML for record 'rec' and returns it as a string""" xmltext = "\n" - record_order_fields(rec) - for field in rec["fields"]: - xmltext = "%s%s" % (xmltext,field_xml_output(field)) + #add the tag 'tag' to each field in rec[tag] + fields=[] + for tag in rec.keys(): + for field in rec[tag]: + fields.append((tag,field)) + + record_order_fields(fields) + + for field in fields: + xmltext = "%s%s" % (xmltext,field_xml_output(field[1],field[0]))#field[0]=tag xmltext = "%s" % xmltext return xmltext + +def records_xml_output(listofrec): + """generates the XML for the list of records 'listofrec' and returns it as a string""" + xmltext = """ + + \n""" % cfg_marc21_dtd + + for rec in listofrec: + xmltext = "%s%s" % (xmltext, record_xml_output(rec)) + xmltext = "%s" % xmltext + return xmltext + +def field_get_subfield_instances(field): + """returns the list of subfields associated with field 'field'""" + return field[0] + +def field_add_subfield(field,code,value): + """adds a subfield to field 'field'""" + field[0].append(create_subfield(code,value)) + + +### IMPLEMENTATION / INVISIBLE FUNCTIONS + +def create_record_RXP(xmltext, verbose=verbose, correct=correct): + """ + creates a record object and returns it + uses the RXP parser + + If verbose>3 then the parser will be strict and will stop in case of well-formedness errors + or DTD errors + If verbose=0, the parser will not give warnings + If 0 We will try to correct errors such as missing attributtes + correct = 0 -> there will not be any attempt to correct errors + + """ + + record = {} + global err + + ord = 1 # this is needed because of the record_xml_output function, where we need to know + # the order of the fields + + + TAG, ATTRS,CHILD_LIST = range(3) + + if verbose > 3: + p = pyRXP.Parser(ErrorOnValidityErrors=1, + ProcessDTD=1, + ErrorOnUnquotedAttributeValues=1, + warnCB = warnCB, + srcName='string input') + else: + p = pyRXP.Parser(ErrorOnValidityErrors=0, + ProcessDTD=1, + ErrorOnUnquotedAttributeValues=0, + warnCB = warnCB, + srcName='string input') + + + if correct: + (rec,e) = wash(xmltext) + err.extend(e) + return (rec,e) + + + root1=p(xmltext) #root = (tagname, attr_dict, child_list, reserved) + + if root1[0]=='collection': + recs = [t for t in root1[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="record"] + if recs !=[]: + root = recs[0] + else: + root = None + else: + root=root1 + + + + # get childs of 'controlfield' + childs_controlfield = [] + if not root[2]==None: + childs_controlfield =[t for t in root[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="controlfield"] + + # get childs of 'datafield' + childs_datafield = [] + if not root[CHILD_LIST]==None: + childs_datafield =[t for t in root[CHILD_LIST] if type(t).__name__=='tuple' and t[TAG]=="datafield"] + + for controlfield in childs_controlfield: + s=controlfield[ATTRS]["tag"] + value='' + if not controlfield==None: + value=''.join([ n for n in controlfield[CHILD_LIST] if type(n).__name__ == 'str']) + + name = type(value).__name__ + + if name in ["int","long"] : + st = str(value) + elif name == 'str': + st = value + else: + if verbose: + err.append((7,'Type found: ' + name)) + st = "" # the type of value is not correct. (user insert something like a list...) + + + field = ([],"","",st,ord) #field = (subfields, ind1, ind2,value,ord) + + if record.has_key(s): + record[s].append(field) + else: + record[s]=[field] + + ord = ord+1 + + for datafield in childs_datafield: + + #create list of subfields + subfields = [] + + childs_subfield = [] + if not datafield[CHILD_LIST]==None: + childs_subfield =[t for t in datafield[CHILD_LIST] if type(t).__name__=='tuple' and t[0]=="subfield"] + + for subfield in childs_subfield: + value='' + if not subfield==None: + value=''.join([ n for n in subfield[CHILD_LIST] if type(n).__name__ == 'str']) + #get_string_value(subfield) + if subfield[ATTRS].has_key('code'): + subfields.append((subfield[ATTRS]["code"],value)) + else: + subfields.append(('!',value)) + + #create field + + if datafield[ATTRS].has_key('tag'): + s = datafield[ATTRS]["tag"] + else: + s = '!' + + if datafield[ATTRS].has_key('ind1'): + ind1 = datafield[ATTRS]["ind1"] + else: + ind1 = '!' + + if datafield[ATTRS].has_key('ind2'): + ind2 = datafield[ATTRS]["ind2"] + else: + ind2 = '!' + + field = (subfields,ind1,ind2,"",ord) + + if record.has_key(s): + record[s].append(field) + else: + record[s]=[field] + + ord = ord+1 + + return (record,err) + + -def create_field(tag,value,ind1="",ind2=""): +def create_record_minidom(xmltext, verbose=verbose, correct=correct): + """ + creates a record object and returns it + uses xml.dom.minidom + """ + + record = {} + ord=1 + global err + + if correct: + xmlt = xmltext + (rec,e) = wash(xmlt,0) + err.append(e) + return (rec,err) + + dom = parseString(xmltext) + root = dom.childNodes[0] + + for controlfield in get_childs_by_tag_name(root,"controlfield"): + s = controlfield.getAttribute("tag") + + text_nodes = controlfield.childNodes + v = u''.join([ n.data for n in text_nodes ]) + + name = type(v).__name__ + if (name in ["int","long"]) : + field = ([],"","",str(v),ord) # field = (subfields, ind1, ind2,value) + elif name == 'str': + field = ([],"","",v,ord) + else: + if verbose: + err.append((7,'Type found: ' + name)) + + field = ([],"","","",ord)# the type of value is not correct. (user insert something like a list...) + + if record.has_key(s): + record[s].append(field) + else: + record[s]=[field] + ord=ord+1 + + for datafield in get_childs_by_tag_name(root,"datafield"): + subfields = [] + + for subfield in get_childs_by_tag_name(datafield,"subfield"): + text_nodes = subfield.childNodes + v = u''.join([ n.data for n in text_nodes ]) + code = subfield.getAttributeNS(None,'code') + if code != '': + subfields.append((code,v)) + else: + subfields.append(('!',v)) + + s = datafield.getAttribute("tag") + if s == '': + s = '!' + + ind1 = datafield.getAttribute("ind1") + + ind2 = datafield.getAttribute("ind2") + + + if record.has_key(s): + record[s].append((subfields,ind1,ind2,"",ord)) + else: + record[s]=[(subfields,ind1,ind2,"",ord)] + ord=ord+1 + + return (record,err) + + +def create_record_4suite(xmltext,verbose=verbose,correct=correct): + """ + creates a record object and returns it + uses 4Suite domlette + """ + record = {} + global err + + if correct: + xmlt = xmltext + (rec,e) = wash(xmlt,1) + err.append(e) + return (rec,e) + + dom = NonvalidatingReader.parseString(xmltext,"urn:dummy") + + root = dom.childNodes[0] + + ord=1 + + for controlfield in get_childs_by_tag_name(root,"controlfield"): + s = controlfield.getAttributeNS(None,"tag") + + text_nodes = controlfield.childNodes + v = u''.join([ n.data for n in text_nodes ]) + + name = type(v).__name__ + if (name in ["int","long"]) : + field = ([],"","",str(v),ord) # field = (subfields, ind1, ind2,value) + elif name == 'str': + field = ([],"","",v,ord) + else: + if verbose: + err.append((7,'Type found: ' + name)) + + field = ([],"","","",ord)# the type of value is not correct. (user insert something like a list...) + + + if record.has_key(s): + record[s].append(field) + else: + record[s]=[field] + ord=ord+1 + + + for datafield in get_childs_by_tag_name(root,"datafield"): + subfields = [] + + for subfield in get_childs_by_tag_name(datafield,"subfield"): + text_nodes = subfield.childNodes + v = u''.join([ n.data for n in text_nodes ]) + + code = subfield.getAttributeNS(None,'code') + if code != '': + subfields.append((code,v)) + else: + subfields.append(('!',v)) + + s = datafield.getAttributeNS(None,"tag") + if s == '': + s = '!' + + ind1 = datafield.getAttributeNS(None,"ind1") + + ind2 = datafield.getAttributeNS(None,"ind2") + + + if record.has_key(s): + record[s].append((subfields,ind1,ind2,"",ord)) + else: + record[s]=[(subfields,ind1,ind2,"",ord)] + ord=ord+1 + + return (record,err) + + + + +def record_order_fields(rec,fun="order_by_ord"): + """orders field inside record 'rec' according to a function""" + rec.sort(eval(fun)) + return + +def record_order_subfields(rec,fun="order_by_code"): + """orders subfield inside record 'rec' according to a function""" + for tag in rec: + for field in rec[tag]: + field[0].sort(eval(fun)) + return + + + +def concat(list): + """concats a list of lists""" + newl = [] + for l in list: + newl.extend(l) + return newl + + + + +def create_field(value,ind1="",ind2="",subfields=[],ord=-1): """ creates a field object and returns it""" - field = {} - field["subfields"] = [] - field["tag"] = tag - field["ind1"] = ind1 - field["ind2"] = ind2 - if type(value).__name__ in ["int","long"] : - field["value"] = str(value) + + name = type(value).__name__ + if name in ["int","long"] : + s = str(value) + elif name == 'str': + s = value else: - field["value"] = value - return field + err.append((7,'Type found: ' + name)) + s="" + -def field_get_subfield_instances(field): - """returns the list of subfields associated with field 'field'""" - return field["subfields"] + field = (subfields,ind1,ind2,s,ord) + return field + def field_add_subfield(field,code,value): """adds a subfield to field 'field'""" - field["subfields"].append(create_subfield(code,value)) + field[0].append(create_subfield(code,value)) -def field_xml_output(field): +def field_xml_output(field,tag): """generates the XML for field 'field' and returns it as a string""" xmltext = "" - if field["value"] != "": - xmltext = "%s %s\n" % (xmltext,field["tag"],escape(field["value"])) + if field[3] != "": + xmltext = "%s %s\n" % (xmltext,tag,encode_for_xml(field[3])) else: - xmltext = "%s \n" % (xmltext,field["tag"],field["ind1"],field["ind2"]) - for subfield in field["subfields"]: + xmltext = "%s \n" % (xmltext,tag,field[1],field[2]) + for subfield in field[0]: xmltext = "%s%s" % (xmltext,subfield_xml_output(subfield)) xmltext = "%s \n" % xmltext return xmltext def create_subfield(code,value): """ creates a subfield object and returns it""" - subfield = {} - subfield["code"] = code if type(value).__name__ in ["int","long"]: - subfield["value"] = str(value) + s = str(value) else: - subfield["value"] = value + s = value + subfield = (code, s) + return subfield def subfield_xml_output(subfield): """generates the XML for a subfield object and return it as a string""" - xmltext = " %s\n" % (subfield["code"],escape(subfield["value"])) + xmltext = " %s\n" % (subfield[0],encode_for_xml(subfield[1])) return xmltext -def order_by_tag(field1, field2): - """function used to order the fields according to their tag value""" - return cmp(field1["tag"], field2["tag"]) +def order_by_ord(field1, field2): + """function used to order the fields according to their ord value""" + return cmp(field1[1][4], field2[1][4]) def order_by_code(subfield1,subfield2): """function used to order the subfields according to their code value""" - return cmp(subfield1['code'],subfield2['code']) + return cmp(subfield1[0],subfield2[0]) def get_childs_by_tag_name(node, local): """retrieves all childs from node 'node' with name 'local' and returns them as a list""" - res=[] - for child in node.childNodes: - if child.localName == local: - res.append(child) + cNodes = list(node.childNodes) + res = [child for child in cNodes if child.nodeName==local] return res def get_string_value(node): """gets all child text nodes of node 'node' and returns them as a unicode string""" text_nodes = node.childNodes return u''.join([ n.data for n in text_nodes ]) -def db_get_xml_record(recid,format='xm'): - """extracts record #newid xml representation from the db and returns it as a string""" - res = run_sql("SELECT value FROM bibfmt WHERE format=%s and id_bibrec=%s",(format,recid,)) - if len(res) > 0: - return decompress(res[0][0]) - return "" + + +def get_childs_by_tag_name_RXP(listofchilds,tag): + """retrieves all childs from 'listofchilds' with tag name 'tag' and returns them as a list. + listofchilds is a list returned by the RXP parser + """ + l=[] + if not listofchilds==None: + l =[t for t in listofchilds if type(t).__name__=='tuple' and t[0]==tag] + + return l + + +def getAttribute_RXP(root, attr): + """ returns the attributte 'attr' from root 'root' + root is a node returned by RXP parser + """ + try: + return u''.join(root[1][attr]) + except KeyError,e: + return "" + + +def get_string_value_RXP(node): + """gets all child text nodes of node 'node' and returns them as a unicode string""" + if not node==None: + return ''.join([ n for n in node[2] if type(n).__name__ == 'str']) + else: + return "" + + +def encode_for_xml(s): + "Encode special chars in string so that it would be XML-compliant." + s = string.replace(s, '&', '&') + s = string.replace(s, '<', '<') + s = string.replace(s, '>', '>') + return s + + +def print_errors(list): + """ creates a unique string with the strings in list, using '\n' as a separator """ + text="" + + for l in list: + text = '%s\n%s'% (text,l) + + return text + + + +def wash(xmltext, parser=2): + """ + Check the structure of the xmltext. Returns a record structure and a list of errors. + parser = 1 - 4_suite + parser = 2 - pyRXP + parser = 0 - minidom + """ + + errors=[] + i,e1 = tagclose('datafield',xmltext) + j,e2 = tagclose('controlfield',xmltext) + k,e3 = tagclose('subfield',xmltext) + w,e4 = tagclose('record',xmltext) + errors.extend(e1) + errors.extend(e2) + errors.extend(e3) + errors.extend(e4) + + if i and j and k and w and parser!=-3: + if parser==1: + (rec,ee) = create_record_4suite(xmltext,0,0) + elif parser==2: + (rec,ee) = create_record_RXP(xmltext,0,0) + else: + (rec,ee) = create_record_minidom(xmltext,0,0) + else: + return (None,errors) - \ No newline at end of file + + + keys = rec.keys() + + for tag in keys: + upper_bound = '999' + n = len(tag) + + if n>3: + i=n-3 + while i>0: + upper_bound = '%s%s' % ('0',upper_bound) + i = i-1 + + if tag == '!': # missing tag + errors.append((1, '(field number(s): ' + ([f[4] for f in rec[tag]]).__str__()+')')) + v=rec[tag] + rec.__delitem__(tag) + rec['000'] = v + tag = '000' + elif not ("001" <= tag <=upper_bound): + errors.append(2) + v = rec[tag] + rec.__delitem__(tag) + rec['000'] = v + tag = '000' + + fields =[] + for field in rec[tag]: + if field[0]==[] and field[3]=='': ## datafield without any subfield + errors.append((8,'(field number: '+field[4].__str__()+')')) + + subfields=[] + for subfield in field[0]: + if subfield[0]=='!': + errors.append((3,'(field number: '+field[4].__str__()+')')) + newsub = ('',subfield[1]) + else: + newsub = subfield + subfields.append(newsub) + + if field[1]=='!': + errors.append((4,'(field number: '+field[4].__str__()+')')) + ind1 = "" + else: + ind1 = field[1] + + if field[2]=='!': + errors.append((5,'(field number: '+field[4].__str__()+')')) + ind2 = "" + else: + ind2=field[2] + + newf = (subfields,ind1,ind2,field[3],field[4]) + fields.append(newf) + + rec[tag]=fields + + return (rec,errors) + + +def tagclose(tagname,xmltext): + """ checks if an XML document does not hae any missing tag with name tagname + """ + import re + errors=[] + pat_open = '<'+tagname+'.*?>' + pat_close = '' + p_open = re.compile(pat_open,re.DOTALL) # DOTALL - to ignore whitespaces + p_close = re.compile(pat_close,re.DOTALL) + list1 = p_open.findall(xmltext) + list2 = p_close.findall(xmltext) + + if len(list1)!=len(list2): + errors.append((99,'(Tagname : ' + tagname + ')')) + return (0,errors) + else: + return (1,errors) + + + +def testImports(c): + """ Test if the import statements did not failed""" + errors=[] + global err1,err2 + + if c==-1: + i = 0 + errors.append((6,err2)) + elif c == -3: + i=0 + errors.append((6,err1)) + else: + i=1 + return (i,errors) + + +def warning(code): + """ It returns a warning message of code 'code'. + If code = (cd, str) it returns the warning message of code 'cd' + and appends str at the end""" + + ws = cfg_bibrecord_warning_msgs + s='' + + if type(code).__name__ == 'str': + return code + + if type(code).__name__ == 'tuple': + if type(code[1]).__name__ == 'str': + s = code[1] + c = code[0] + else: + c = code + if ws.has_key(c): + return ws[c]+s + else: + return "" + +def warnings(l): + """it applies the function warning to every element in l""" + list = [] + for w in l: + list.append(warning(w)) + return list + diff --git a/modules/bibedit/lib/bibrecord_config.py b/modules/bibedit/lib/bibrecord_config.py new file mode 100644 index 000000000..ff4f2ffe0 --- /dev/null +++ b/modules/bibedit/lib/bibrecord_config.py @@ -0,0 +1,46 @@ +## $Id$ + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +#include "configbis.wml" + +### CONFIGURATION OPTIONS FOR BIBRECORD LIBRARY + +#0: Parse Warning +"""Configuration for module bibrecord""" + +cfg_marc21_dtd = "/bibedit/MARC21slim.dtd" + +cfg_bibrecord_warning_msgs = { + 0: '' , + 1: 'WARNING: tag missing for field(s)\nValue stored with tag \'000\'', + 2: 'WARNING: bad range for tags (tag must be in range 001-999)\nValue stored with tag \'000\'', + 3: 'WARNING: Missing atributte \'code\' for subfield\nValue stored with code \'\'', + 4: 'WARNING: Missing attributte \'ind1\'\n Value stored with ind1 = \'\'', + 5: 'WARNING: Missing attributte \'ind2\'\n Value stored with ind2 = \'\'', + 6: 'Import Error\n', + 7: 'WARNING: value expected of type string.', + 8: 'WARNING: empty datafield', + 98:'WARNING: problems importing cdsware', + 99: 'Document not well formed' + } # Dictionary of warning messages + +cfg_bibrecord_default_verbose_level=0 #Verbose level to be used in the creation of records +cfg_bibrecord_default_correct=0 #Correct (0/1) to be used in the creation of records +cfg_bibrecord_parsers_available = [0,1,2] #parsers available + # 0 - minidom; 1-is4suite; 2-pyRXP diff --git a/modules/bibedit/lib/bibrecord_config.py.wml b/modules/bibedit/lib/bibrecord_config.py.wml new file mode 100644 index 000000000..ff4f2ffe0 --- /dev/null +++ b/modules/bibedit/lib/bibrecord_config.py.wml @@ -0,0 +1,46 @@ +## $Id$ + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +#include "configbis.wml" + +### CONFIGURATION OPTIONS FOR BIBRECORD LIBRARY + +#0: Parse Warning +"""Configuration for module bibrecord""" + +cfg_marc21_dtd = "/bibedit/MARC21slim.dtd" + +cfg_bibrecord_warning_msgs = { + 0: '' , + 1: 'WARNING: tag missing for field(s)\nValue stored with tag \'000\'', + 2: 'WARNING: bad range for tags (tag must be in range 001-999)\nValue stored with tag \'000\'', + 3: 'WARNING: Missing atributte \'code\' for subfield\nValue stored with code \'\'', + 4: 'WARNING: Missing attributte \'ind1\'\n Value stored with ind1 = \'\'', + 5: 'WARNING: Missing attributte \'ind2\'\n Value stored with ind2 = \'\'', + 6: 'Import Error\n', + 7: 'WARNING: value expected of type string.', + 8: 'WARNING: empty datafield', + 98:'WARNING: problems importing cdsware', + 99: 'Document not well formed' + } # Dictionary of warning messages + +cfg_bibrecord_default_verbose_level=0 #Verbose level to be used in the creation of records +cfg_bibrecord_default_correct=0 #Correct (0/1) to be used in the creation of records +cfg_bibrecord_parsers_available = [0,1,2] #parsers available + # 0 - minidom; 1-is4suite; 2-pyRXP diff --git a/modules/bibedit/lib/bibrecord_tests.py b/modules/bibedit/lib/bibrecord_tests.py new file mode 100644 index 000000000..f9c8a706c --- /dev/null +++ b/modules/bibedit/lib/bibrecord_tests.py @@ -0,0 +1,201 @@ +## $Id$ + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +# -*- coding: utf-8 -*- + +from config import tmpdir +import bibrecord +import unittest +from string import expandtabs, replace + +class SanityTest(unittest.TestCase): + +### check for sanity -- xml -> create_record -> xml + def test_for_sanity(self): + """ bibrecord - checking for sanity """ + + f=open(tmpdir + '/demobibdata.xml','r') + xmltext = f.read() + f.close() + rs = bibrecord.create_records(xmltext) + recs = map((lambda x:x[0]),rs) + xmlT = bibrecord.records_xml_output(recs) + x = xmlT.replace('\n','') + y = xmltext.replace('\n','') + xx=expandtabs(x) + yy=expandtabs(y) + xxx = xx.replace(' ','') + yyy = yy.replace(' ','') + #self.assertEqual(xxx,yyy) + +### testing for success + +class SuccessTest(unittest.TestCase): + """ bibrecord - testing for success """ + def setUp(self): + f=open(tmpdir + '/demobibdata.xml','r') + xmltext = f.read() + f.close() + self.recs = map((lambda x:x[0]),bibrecord.create_records(xmltext)) + + def test_records_created(self): + """ bibrecord - number of records created """ + ## check if it creates every records (the file demobibdata.xml has 75 records) + self.assertEqual(75,len(self.recs)) + + def test_tags_created(self): + """ bibrecord - tags created """ + ## check if the tags are correct + tags= ['020', '037', '041', '080', '088', '100', '245', '246', '250', '260', '270', '300', '340', '490', '500', '502', '520', '590', '595', '650', '653', '690', '700', '710', '856','909','980','999'] + t=[] + for rec in self.recs: + t.extend(rec.keys()) + t.sort() + #eliminate the elements repeated + tt = [] + for x in t: + if not x in tt: + tt.append(x) + self.assertEqual(tags,tt) + + def test_fields_created(self): + """bibrecord - fields created""" + ## check if the number of fields for each record is correct + + fields=[13,13, 8, 11, 10,12, 10, 14, 10, 17, 13, 15, 10, 9, 14, 10, 11, 11, 11, 9, 10, 10, 10, 8, 8, 8, 9, 9, 9, 10, 8, 8, 8,8, 14, 13, 14, 14, 15, 12,12, 12,14, 13, 11, 15, 15, 14, 14, 13, 15, 14, 14, 14, 15, 14, 15, 14, 14, 15, 14, 13, 13, 14, 11, 13, 11, 14, 8, 10, 13, 12, 11, 12,6] + + cr=[] + ret=[] + for rec in self.recs: + cr.append(len(rec.values())) + ret.append(rec) + self.assertEqual(fields,cr) + +class BadInputTreatmentTest(unittest.TestCase): + """ bibrecord - testing for bad input treatment """ + + +### check bad input treatment ### + def test_wrong_attribute(self): + """bibrecord - bad input : Has \'cde\' instead \'code\' in a subfield attribute""" + ws = bibrecord.cfg_bibrecord_warning_msgs + xml_error1 = """ + + 33 + + eng + + + Doe, John + + + On the foo and bar + + + """ + (rec,st,e) = bibrecord.create_record(xml_error1,1,1) + ee='' + for i in e: + if type(i).__name__ == 'str': + if i.count(ws[3])>0: + ee = i + self.assertEqual(bibrecord.warning((3,'(field number: 4)')),ee) + + def test_missing_attribute(self): + """ bibrecord - bad input : Missing attribute \"tag\" """ + ws = bibrecord.cfg_bibrecord_warning_msgs + xml_error2 = """ + + 33 + + eng + + + Doe, John + + + On the foo and bar + + + """ + (rec,st,e) = bibrecord.create_record(xml_error2,1,1) + ee='' + for i in e: + if type(i).__name__ == 'str': + if i.count(ws[1])>0: + ee = i + self.assertEqual(bibrecord.warning((1,'(field number(s): [2])')),ee) + + def test_empty_datafield(self): + """ bibrecord - bad input : Datafield without any subfield """ + ws = bibrecord.cfg_bibrecord_warning_msgs + xml_error3 = """ + + 33 + + + + Doe, John + + + On the foo and bar + + + """ + (rec,st,e) = bibrecord.create_record(xml_error3,1,1) + ee='' + for i in e: + if type(i).__name__ == 'str': + if i.count(ws[8])>0: + ee = i + self.assertEqual(bibrecord.warning((8,'(field number: 2)')),ee) + + + def test_missing_tag(self): + """bibrecord - bad input : Missing end \"tag\"""" + ws = bibrecord.cfg_bibrecord_warning_msgs + xml_error4 = """ + + 33 + + eng + + + Doe, John + + + On the foo and bar + + """ + (rec,st,e) = bibrecord.create_record(xml_error4,1,1) + ee = '' + for i in e: + if type(i).__name__ == 'str': + if i.count(ws[99])>0: + ee = i + self.assertEqual(bibrecord.warning((99,'(Tagname : datafield)')),ee) + +def create_test_suite(): + """Return test suite for the bibrecord module""" + return unittest.TestSuite((unittest.makeSuite(SanityTest,'test'), + unittest.makeSuite(SuccessTest,'test'), + unittest.makeSuite(BadInputTreatmentTest,'test'))) +if __name__ == '__main__': + unittest.TextTestRunner(verbosity=2).run(create_test_suite()) + diff --git a/modules/bibedit/lib/bibrecord_tests.py.wml b/modules/bibedit/lib/bibrecord_tests.py.wml new file mode 100644 index 000000000..f9c8a706c --- /dev/null +++ b/modules/bibedit/lib/bibrecord_tests.py.wml @@ -0,0 +1,201 @@ +## $Id$ + +## This file is part of the CERN Document Server Software (CDSware). +## Copyright (C) 2002 CERN. +## +## The CDSware is free software; you can redistribute it and/or +## modify it under the terms of the GNU General Public License as +## published by the Free Software Foundation; either version 2 of the +## License, or (at your option) any later version. +## +## The CDSware is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with CDSware; if not, write to the Free Software Foundation, Inc., +## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +# -*- coding: utf-8 -*- + +from config import tmpdir +import bibrecord +import unittest +from string import expandtabs, replace + +class SanityTest(unittest.TestCase): + +### check for sanity -- xml -> create_record -> xml + def test_for_sanity(self): + """ bibrecord - checking for sanity """ + + f=open(tmpdir + '/demobibdata.xml','r') + xmltext = f.read() + f.close() + rs = bibrecord.create_records(xmltext) + recs = map((lambda x:x[0]),rs) + xmlT = bibrecord.records_xml_output(recs) + x = xmlT.replace('\n','') + y = xmltext.replace('\n','') + xx=expandtabs(x) + yy=expandtabs(y) + xxx = xx.replace(' ','') + yyy = yy.replace(' ','') + #self.assertEqual(xxx,yyy) + +### testing for success + +class SuccessTest(unittest.TestCase): + """ bibrecord - testing for success """ + def setUp(self): + f=open(tmpdir + '/demobibdata.xml','r') + xmltext = f.read() + f.close() + self.recs = map((lambda x:x[0]),bibrecord.create_records(xmltext)) + + def test_records_created(self): + """ bibrecord - number of records created """ + ## check if it creates every records (the file demobibdata.xml has 75 records) + self.assertEqual(75,len(self.recs)) + + def test_tags_created(self): + """ bibrecord - tags created """ + ## check if the tags are correct + tags= ['020', '037', '041', '080', '088', '100', '245', '246', '250', '260', '270', '300', '340', '490', '500', '502', '520', '590', '595', '650', '653', '690', '700', '710', '856','909','980','999'] + t=[] + for rec in self.recs: + t.extend(rec.keys()) + t.sort() + #eliminate the elements repeated + tt = [] + for x in t: + if not x in tt: + tt.append(x) + self.assertEqual(tags,tt) + + def test_fields_created(self): + """bibrecord - fields created""" + ## check if the number of fields for each record is correct + + fields=[13,13, 8, 11, 10,12, 10, 14, 10, 17, 13, 15, 10, 9, 14, 10, 11, 11, 11, 9, 10, 10, 10, 8, 8, 8, 9, 9, 9, 10, 8, 8, 8,8, 14, 13, 14, 14, 15, 12,12, 12,14, 13, 11, 15, 15, 14, 14, 13, 15, 14, 14, 14, 15, 14, 15, 14, 14, 15, 14, 13, 13, 14, 11, 13, 11, 14, 8, 10, 13, 12, 11, 12,6] + + cr=[] + ret=[] + for rec in self.recs: + cr.append(len(rec.values())) + ret.append(rec) + self.assertEqual(fields,cr) + +class BadInputTreatmentTest(unittest.TestCase): + """ bibrecord - testing for bad input treatment """ + + +### check bad input treatment ### + def test_wrong_attribute(self): + """bibrecord - bad input : Has \'cde\' instead \'code\' in a subfield attribute""" + ws = bibrecord.cfg_bibrecord_warning_msgs + xml_error1 = """ + + 33 + + eng + + + Doe, John + + + On the foo and bar + + + """ + (rec,st,e) = bibrecord.create_record(xml_error1,1,1) + ee='' + for i in e: + if type(i).__name__ == 'str': + if i.count(ws[3])>0: + ee = i + self.assertEqual(bibrecord.warning((3,'(field number: 4)')),ee) + + def test_missing_attribute(self): + """ bibrecord - bad input : Missing attribute \"tag\" """ + ws = bibrecord.cfg_bibrecord_warning_msgs + xml_error2 = """ + + 33 + + eng + + + Doe, John + + + On the foo and bar + + + """ + (rec,st,e) = bibrecord.create_record(xml_error2,1,1) + ee='' + for i in e: + if type(i).__name__ == 'str': + if i.count(ws[1])>0: + ee = i + self.assertEqual(bibrecord.warning((1,'(field number(s): [2])')),ee) + + def test_empty_datafield(self): + """ bibrecord - bad input : Datafield without any subfield """ + ws = bibrecord.cfg_bibrecord_warning_msgs + xml_error3 = """ + + 33 + + + + Doe, John + + + On the foo and bar + + + """ + (rec,st,e) = bibrecord.create_record(xml_error3,1,1) + ee='' + for i in e: + if type(i).__name__ == 'str': + if i.count(ws[8])>0: + ee = i + self.assertEqual(bibrecord.warning((8,'(field number: 2)')),ee) + + + def test_missing_tag(self): + """bibrecord - bad input : Missing end \"tag\"""" + ws = bibrecord.cfg_bibrecord_warning_msgs + xml_error4 = """ + + 33 + + eng + + + Doe, John + + + On the foo and bar + + """ + (rec,st,e) = bibrecord.create_record(xml_error4,1,1) + ee = '' + for i in e: + if type(i).__name__ == 'str': + if i.count(ws[99])>0: + ee = i + self.assertEqual(bibrecord.warning((99,'(Tagname : datafield)')),ee) + +def create_test_suite(): + """Return test suite for the bibrecord module""" + return unittest.TestSuite((unittest.makeSuite(SanityTest,'test'), + unittest.makeSuite(SuccessTest,'test'), + unittest.makeSuite(BadInputTreatmentTest,'test'))) +if __name__ == '__main__': + unittest.TextTestRunner(verbosity=2).run(create_test_suite()) +