diff --git a/invenio/legacy/bibrecord/__init__.py b/invenio/legacy/bibrecord/__init__.py index 6406d1765..5e0212fb5 100644 --- a/invenio/legacy/bibrecord/__init__.py +++ b/invenio/legacy/bibrecord/__init__.py @@ -1,2338 +1,2327 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, ## 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ BibRecord - XML MARC processing library for Invenio. BibRecord library offer a whole set of API function to handle record metadata. Managing metadata with BibRecord In order to work with bibrecord library you first need to have available a record representation. If you have a MARCXML representation of the record to be handled, you can use the create_record function to obtain a bibrecord internal representation:: from invenio.legacy.bibrecord import create_record record = create_record(marcxml)[0] If you want to handle a record stored in the system and you know the record ID, then you can easily exploit Invenio search_engine API to obtain the corresponding marcxml:: from invenio.legacy.bibrecord import create_record from invenio.legacy.search_engine import print_record marcxml = print_record(rec_id, 'xm') record = create_record(marcxml)[0] Having an internal representation of a record you can manipulate it by means of bibrecord functions like :func:`~invenio.legacy.bibrecord.record_get_field_instances`, :func:`~invenio.legacy.bibrecord.record_has_field`, :func:`~invenio.legacy.bibrecord.record_add_field`, :func:`~invenio.legacy.bibrecord.record_delete_field`, :func:`~invenio.legacy.bibrecord.record_delete_subfield`, :func:`~invenio.legacy.bibrecord.record_add_or_modify_subfield`, :func:`~invenio.legacy.bibrecord.record_add_subfield`, :func:`~invenio.legacy.bibrecord.record_does_field_exist`, :func:`~invenio.legacy.bibrecord.record_filter_fields`, :func:`~invenio.legacy.bibrecord.record_replace_in_subfields`, :func:`~invenio.legacy.bibrecord.record_get_field_value`, :func:`~invenio.legacy.bibrecord.record_get_field_values`... At the end, if you want the MARCXML representation of the record you can use record_xml_output:: from invenio.legacy.bibrecord import create_record from invenio.legacy.search_engine import print_record marcxml = print_record(rec_id, 'xm') record = create_record(marcxml)[0] # ... manipulation ... new_marcxml = record_xml_output(record) In order to write back such a record into the system you should use the BibUpload utility. Please referer to bibrecord.py for a complete and up-to-date description of the API, see :func:`~invenio.legacy.bibrecordcreate_record`, :func:`~invenio.legacy.bibrecordrecord_get_field_instances` and friends in the source code of this file in the section entitled INTERFACE. As always, a good entry point to the bibrecord library and its record structure manipulating functions is to read the unit test cases that are located in bibrecord_tests.py and bibupload_regression_tests.py. """ ### IMPORT INTERESTING MODULES AND XML PARSERS import re import string import sys from six import StringIO if sys.hexversion < 0x2040000: # pylint: disable=W0622 from sets import Set as set # pylint: enable=W0622 from invenio.base.globals import cfg from invenio.legacy.bibrecord.bibrecord_config import CFG_MARC21_DTD, \ CFG_BIBRECORD_WARNING_MSGS, CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL, \ CFG_BIBRECORD_DEFAULT_CORRECT, CFG_BIBRECORD_PARSERS_AVAILABLE, \ InvenioBibRecordParserError, InvenioBibRecordFieldError from invenio.utils.text import encode_for_xml from invenio.legacy.dbquery import run_sql from intbitset import intbitset # Some values used for the RXP parsing. TAG, ATTRS, CHILDREN = 0, 1, 2 # Find out about the best usable parser: AVAILABLE_PARSERS = [] # Do we remove singletons (empty tags)? # NOTE: this is currently set to True as there are some external workflow # exploiting singletons, e.g. bibupload -c used to delete fields, and # bibdocfile --fix-marc called on a record where the latest document # has been deleted. CFG_BIBRECORD_KEEP_SINGLETONS = True try: - import pyRXP - if 'pyrxp' in CFG_BIBRECORD_PARSERS_AVAILABLE: - AVAILABLE_PARSERS.append('pyrxp') + from lxml import etree + if 'lxml' in CFG_BIBRECORD_PARSERS_AVAILABLE: + AVAILABLE_PARSERS.append('lxml') except ImportError: pass try: - from lxml import etree - if 'lxml' in CFG_BIBRECORD_PARSERS_AVAILABLE: - AVAILABLE_PARSERS.append('lxml') + import pyRXP + if 'pyrxp' in CFG_BIBRECORD_PARSERS_AVAILABLE: + AVAILABLE_PARSERS.append('pyrxp') except ImportError: pass try: import Ft.Xml.Domlette if '4suite' in CFG_BIBRECORD_PARSERS_AVAILABLE: AVAILABLE_PARSERS.append('4suite') except ImportError: pass except Exception as err: from warnings import warn warn("Error when importing 4suite: %s" % err) pass try: import xml.dom.minidom import xml.parsers.expat if 'minidom' in CFG_BIBRECORD_PARSERS_AVAILABLE: AVAILABLE_PARSERS.append('minidom') except ImportError: pass ### INTERFACE / VISIBLE FUNCTIONS def create_field(subfields=None, ind1=' ', ind2=' ', controlfield_value='', global_position=-1): """ Return a field created with the provided elements. Global position is set arbitrary to -1. """ if subfields is None: subfields = [] ind1, ind2 = _wash_indicators(ind1, ind2) field = (subfields, ind1, ind2, controlfield_value, global_position) _check_field_validity(field) return field def create_records(marcxml, verbose=CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL, correct=CFG_BIBRECORD_DEFAULT_CORRECT, parser='', keep_singletons=CFG_BIBRECORD_KEEP_SINGLETONS): """ Create a list of records from the marcxml description. :returns: a list of objects initiated by the function create_record(). Please see that function's docstring. """ # Use the DOTALL flag to include newlines. regex = re.compile('<record.*?>.*?</record>', re.DOTALL) record_xmls = regex.findall(marcxml) return [create_record(record_xml, verbose=verbose, correct=correct, parser=parser, keep_singletons=keep_singletons) for record_xml in record_xmls] def create_record(marcxml, verbose=CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL, correct=CFG_BIBRECORD_DEFAULT_CORRECT, parser='', sort_fields_by_indicators=False, keep_singletons=CFG_BIBRECORD_KEEP_SINGLETONS): """Create a record object from the marcxml description. Uses the best parser available in CFG_BIBRECORD_PARSERS_AVAILABLE or the parser specified. The returned object is a tuple (record, status_code, list_of_errors), where status_code is 0 when there are errors, 1 when no errors. The return record structure is as follows:: Record := {tag : [Field]} Field := (Subfields, ind1, ind2, value) Subfields := [(code, value)] .. code-block:: none .--------. | record | '---+----' | .------------------------+------------------------------------. |record['001'] |record['909'] |record['520'] | | | | | [list of fields] [list of fields] [list of fields] ... | | | | .--------+--+-----------. | | | | | | |[0] |[0] |[1] ... |[0] .----+------. .-----+-----. .--+--------. .---+-------. | Field 001 | | Field 909 | | Field 909 | | Field 520 | '-----------' '-----+-----' '--+--------' '---+-------' | | | | ... | ... ... | .----------+-+--------+------------. | | | | |[0] |[1] |[2] | [list of subfields] 'C' '4' ... | .----+---------------+------------------------+ | | | ('a', 'value') | ('a', 'value for another a') ('b', 'value for subfield b') :param marcxml: an XML string representation of the record to create :param verbose: the level of verbosity: 0 (silent), 1-2 (warnings), 3(strict:stop when errors) :param correct: 1 to enable correction of marcxml syntax. Else 0. :return: a tuple (record, status_code, list_of_errors), where status code is 0 where there are errors, 1 when no errors """ # Select the appropriate parser. parser = _select_parser(parser) try: if parser == 'pyrxp': rec = _create_record_rxp(marcxml, verbose, correct, keep_singletons=keep_singletons) elif parser == 'lxml': rec = _create_record_lxml(marcxml, verbose, correct, keep_singletons=keep_singletons) elif parser == '4suite': rec = _create_record_4suite(marcxml, keep_singletons=keep_singletons) elif parser == 'minidom': rec = _create_record_minidom(marcxml, keep_singletons=keep_singletons) except InvenioBibRecordParserError as ex1: return (None, 0, str(ex1)) -# _create_record = { -# 'pyrxp': _create_record_rxp, -# 'lxml': _create_record_lxml, -# '4suite': _create_record_4suite, -# 'minidom': _create_record_minidom, -# } - -# try: -# rec = _create_record[parser](marcxml, verbose) -# except InvenioBibRecordParserError as ex1: -# return (None, 0, str(ex1)) - if sort_fields_by_indicators: _record_sort_by_indicators(rec) errs = [] if correct: # Correct the structure of the record. errs = _correct_record(rec) return (rec, int(not errs), errs) def filter_field_instances(field_instances, filter_subcode, filter_value, filter_mode='e'): """Filter the given field. Filters given field and returns only that field instances that contain filter_subcode with given filter_value. As an input for search function accepts output from record_get_field_instances function. Function can be run in three modes: - 'e' - looking for exact match in subfield value - 's' - looking for substring in subfield value - 'r' - looking for regular expression in subfield value Example: record_filter_field(record_get_field_instances(rec, '999', '%', '%'), 'y', '2001') In this case filter_subcode is 'y' and filter_value is '2001'. :param field_instances: output from record_get_field_instances :param filter_subcode: name of the subfield :type filter_subcode: string :param filter_value: value of the subfield :type filter_value: string :param filter_mode: 'e','s' or 'r' """ matched = [] if filter_mode == 'e': to_match = (filter_subcode, filter_value) for instance in field_instances: if to_match in instance[0]: matched.append(instance) elif filter_mode == 's': for instance in field_instances: for subfield in instance[0]: if subfield[0] == filter_subcode and \ subfield[1].find(filter_value) > -1: matched.append(instance) break elif filter_mode == 'r': reg_exp = re.compile(filter_value) for instance in field_instances: for subfield in instance[0]: if subfield[0] == filter_subcode and \ reg_exp.match(subfield[1]) is not None: matched.append(instance) break return matched def record_drop_duplicate_fields(record): """ Return a record where all the duplicate fields have been removed. Fields are considered identical considering also the order of their subfields. """ out = {} position = 0 tags = sorted(record.keys()) for tag in tags: fields = record[tag] out[tag] = [] current_fields = set() for full_field in fields: field = (tuple(full_field[0]),) + full_field[1:4] if field not in current_fields: current_fields.add(field) position += 1 out[tag].append(full_field[:4] + (position,)) return out def records_identical(rec1, rec2, skip_005=True, ignore_field_order=False, ignore_subfield_order=False, ignore_duplicate_subfields=False, ignore_duplicate_controlfields=False): """ Return True if rec1 is identical to rec2. It does so regardless of a difference in the 005 tag (i.e. the timestamp). """ rec1_keys = set(rec1.keys()) rec2_keys = set(rec2.keys()) if skip_005: rec1_keys.discard("005") rec2_keys.discard("005") if rec1_keys != rec2_keys: return False for key in rec1_keys: if ignore_duplicate_controlfields and key.startswith('00'): if set(field[3] for field in rec1[key]) != \ set(field[3] for field in rec2[key]): return False continue rec1_fields = rec1[key] rec2_fields = rec2[key] if len(rec1_fields) != len(rec2_fields): # They already differs in length... return False if ignore_field_order: ## We sort the fields, first by indicators and then by anything else rec1_fields = sorted(rec1_fields, key=lambda elem: (elem[1], elem[2], elem[3], elem[0])) rec2_fields = sorted(rec2_fields, key=lambda elem: (elem[1], elem[2], elem[3], elem[0])) else: ## We sort the fields, first by indicators, then by global position and then by anything else rec1_fields = sorted(rec1_fields, key=lambda elem: (elem[1], elem[2], elem[4], elem[3], elem[0])) rec2_fields = sorted(rec2_fields, key=lambda elem: (elem[1], elem[2], elem[4], elem[3], elem[0])) for field1, field2 in zip(rec1_fields, rec2_fields): if ignore_duplicate_subfields: if field1[1:4] != field2[1:4] or \ set(field1[0]) != set(field2[0]): return False elif ignore_subfield_order: if field1[1:4] != field2[1:4] or \ sorted(field1[0]) != sorted(field2[0]): return False elif field1[:4] != field2[:4]: return False return True def record_get_field_instances(rec, tag="", ind1=" ", ind2=" "): """ Return the list of field instances for the specified tag and indications. Return empty list if not found. If tag is empty string, returns all fields Parameters (tag, ind1, ind2) can contain wildcard %. :param rec: a record structure as returned by create_record() :param tag: a 3 characters long string :param ind1: a 1 character long string :param ind2: a 1 character long string :param code: a 1 character long string :return: a list of field tuples (Subfields, ind1, ind2, value, field_position_global) where subfields is list of (code, value) """ if not rec: return [] if not tag: return rec.items() else: out = [] ind1, ind2 = _wash_indicators(ind1, ind2) if '%' in tag: # Wildcard in tag. Check all possible for field_tag in rec: if _tag_matches_pattern(field_tag, tag): for possible_field_instance in rec[field_tag]: if (ind1 in ('%', possible_field_instance[1]) and ind2 in ('%', possible_field_instance[2])): out.append(possible_field_instance) else: # Completely defined tag. Use dict for possible_field_instance in rec.get(tag, []): if (ind1 in ('%', possible_field_instance[1]) and ind2 in ('%', possible_field_instance[2])): out.append(possible_field_instance) return out def record_add_field(rec, tag, ind1=' ', ind2=' ', controlfield_value='', subfields=None, field_position_global=None, field_position_local=None): """ Add a new field into the record. If field_position_global or field_position_local is specified then this method will insert the new field at the desired position. Otherwise a global field position will be computed in order to insert the field at the best position (first we try to keep the order of the tags and then we insert the field at the end of the fields with the same tag). If both field_position_global and field_position_local are present, then field_position_local takes precedence. :param rec: the record data structure :param tag: the tag of the field to be added :param ind1: the first indicator :param ind2: the second indicator :param controlfield_value: the value of the controlfield :param subfields: the subfields (a list of tuples (code, value)) :param field_position_global: the global field position (record wise) :param field_position_local: the local field position (tag wise) :return: the global field position of the newly inserted field or -1 if the operation failed """ error = _validate_record_field_positions_global(rec) if error: # FIXME one should write a message here pass # Clean the parameters. if subfields is None: subfields = [] ind1, ind2 = _wash_indicators(ind1, ind2) if controlfield_value and (ind1 != ' ' or ind2 != ' ' or subfields): return -1 # Detect field number to be used for insertion: # Dictionaries for uniqueness. tag_field_positions_global = {}.fromkeys([field[4] for field in rec.get(tag, [])]) all_field_positions_global = {}.fromkeys([field[4] for fields in rec.values() for field in fields]) if field_position_global is None and field_position_local is None: # Let's determine the global field position of the new field. if tag in rec: try: field_position_global = max([field[4] for field in rec[tag]]) \ + 1 except IndexError: if tag_field_positions_global: field_position_global = max(tag_field_positions_global) + 1 elif all_field_positions_global: field_position_global = max(all_field_positions_global) + 1 else: field_position_global = 1 else: if tag in ('FMT', 'FFT', 'BDR', 'BDM'): # Add the new tag to the end of the record. if tag_field_positions_global: field_position_global = max(tag_field_positions_global) + 1 elif all_field_positions_global: field_position_global = max(all_field_positions_global) + 1 else: field_position_global = 1 else: # Insert the tag in an ordered way by selecting the # right global field position. immediate_lower_tag = '000' for rec_tag in rec: if (tag not in ('FMT', 'FFT', 'BDR', 'BDM') and immediate_lower_tag < rec_tag < tag): immediate_lower_tag = rec_tag if immediate_lower_tag == '000': field_position_global = 1 else: field_position_global = rec[immediate_lower_tag][-1][4] + 1 field_position_local = len(rec.get(tag, [])) _shift_field_positions_global(rec, field_position_global, 1) elif field_position_local is not None: if tag in rec: if field_position_local >= len(rec[tag]): field_position_global = rec[tag][-1][4] + 1 else: field_position_global = rec[tag][field_position_local][4] _shift_field_positions_global(rec, field_position_global, 1) else: if all_field_positions_global: field_position_global = max(all_field_positions_global) + 1 else: # Empty record. field_position_global = 1 elif field_position_global is not None: # If the user chose an existing global field position, shift all the # global field positions greater than the input global field position. if tag not in rec: if all_field_positions_global: field_position_global = max(all_field_positions_global) + 1 else: field_position_global = 1 field_position_local = 0 elif field_position_global < min(tag_field_positions_global): field_position_global = min(tag_field_positions_global) _shift_field_positions_global(rec, min(tag_field_positions_global), 1) field_position_local = 0 elif field_position_global > max(tag_field_positions_global): field_position_global = max(tag_field_positions_global) + 1 _shift_field_positions_global(rec, max(tag_field_positions_global) + 1, 1) field_position_local = len(rec.get(tag, [])) else: if field_position_global in tag_field_positions_global: _shift_field_positions_global(rec, field_position_global, 1) field_position_local = 0 for position, field in enumerate(rec[tag]): if field[4] == field_position_global + 1: field_position_local = position # Create the new field. newfield = (subfields, ind1, ind2, str(controlfield_value), field_position_global) rec.setdefault(tag, []).insert(field_position_local, newfield) # Return new field number: return field_position_global def record_has_field(rec, tag): """ Check if the tag exists in the record. :param rec: the record data structure :param the: field :return: a boolean """ return tag in rec def record_delete_field(rec, tag, ind1=' ', ind2=' ', field_position_global=None, field_position_local=None): """ Delete the field with the given position. If global field position is specified, deletes the field with the corresponding global field position. If field_position_local is specified, deletes the field with the corresponding local field position and tag. Else deletes all the fields matching tag and optionally ind1 and ind2. If both field_position_global and field_position_local are present, then field_position_local takes precedence. :param rec: the record data structure :param tag: the tag of the field to be deleted :param ind1: the first indicator of the field to be deleted :param ind2: the second indicator of the field to be deleted :param field_position_global: the global field position (record wise) :param field_position_local: the local field position (tag wise) :return: the list of deleted fields """ error = _validate_record_field_positions_global(rec) if error: # FIXME one should write a message here. pass if tag not in rec: return False ind1, ind2 = _wash_indicators(ind1, ind2) deleted = [] newfields = [] if field_position_global is None and field_position_local is None: # Remove all fields with tag 'tag'. for field in rec[tag]: if field[1] != ind1 or field[2] != ind2: newfields.append(field) else: deleted.append(field) rec[tag] = newfields elif field_position_global is not None: # Remove the field with 'field_position_global'. for field in rec[tag]: if (field[1] != ind1 and field[2] != ind2 or field[4] != field_position_global): newfields.append(field) else: deleted.append(field) rec[tag] = newfields elif field_position_local is not None: # Remove the field with 'field_position_local'. try: del rec[tag][field_position_local] except IndexError: return [] if not rec[tag]: # Tag is now empty, remove it. del rec[tag] return deleted def record_delete_fields(rec, tag, field_positions_local=None): """ Delete all/some fields defined with MARC tag 'tag' from record 'rec'. :param rec: a record structure. :type rec: tuple :param tag: three letter field. :type tag: string :param field_position_local: if set, it is the list of local positions within all the fields with the specified tag, that should be deleted. If not set all the fields with the specified tag will be deleted. :type field_position_local: sequence :return: the list of deleted fields. :rtype: list :note: the record is modified in place. """ if tag not in rec: return [] new_fields, deleted_fields = [], [] for position, field in enumerate(rec.get(tag, [])): if field_positions_local is None or position in field_positions_local: deleted_fields.append(field) else: new_fields.append(field) if new_fields: rec[tag] = new_fields else: del rec[tag] return deleted_fields def record_add_fields(rec, tag, fields, field_position_local=None, field_position_global=None): """ Add the fields into the record at the required position. The position is specified by the tag and the field_position_local in the list of fields. :param rec: a record structure :param tag: the tag of the fields to be moved :param field_position_local: the field_position_local to which the field will be inserted. If not specified, appends the fields to the tag. :param a: list of fields to be added :return: -1 if the operation failed, or the field_position_local if it was successful """ if field_position_local is None and field_position_global is None: for field in fields: record_add_field( rec, tag, ind1=field[1], ind2=field[2], subfields=field[0], controlfield_value=field[3]) else: fields.reverse() for field in fields: record_add_field( rec, tag, ind1=field[1], ind2=field[2], subfields=field[0], controlfield_value=field[3], field_position_local=field_position_local, field_position_global=field_position_global) return field_position_local def record_move_fields(rec, tag, field_positions_local, field_position_local=None): """ Move some fields to the position specified by 'field_position_local'. :param rec: a record structure as returned by create_record() :param tag: the tag of the fields to be moved :param field_positions_local: the positions of the fields to move :param field_position_local: insert the field before that field_position_local. If unspecified, appends the fields :return: the field_position_local is the operation was successful """ fields = record_delete_fields( rec, tag, field_positions_local=field_positions_local) return record_add_fields( rec, tag, fields, field_position_local=field_position_local) def record_delete_subfield(rec, tag, subfield_code, ind1=' ', ind2=' '): """Delete all subfields with subfield_code in the record.""" ind1, ind2 = _wash_indicators(ind1, ind2) for field in rec.get(tag, []): if field[1] == ind1 and field[2] == ind2: field[0][:] = [subfield for subfield in field[0] if subfield_code != subfield[0]] def record_get_field(rec, tag, field_position_global=None, field_position_local=None): """ Return the the matching field. One has to enter either a global field position or a local field position. :return: a list of subfield tuples (subfield code, value). :rtype: list """ if field_position_global is None and field_position_local is None: raise InvenioBibRecordFieldError( "A field position is required to " "complete this operation.") elif field_position_global is not None and \ field_position_local is not None: raise InvenioBibRecordFieldError( "Only one field position is required " "to complete this operation.") elif field_position_global: if tag not in rec: raise InvenioBibRecordFieldError("No tag '%s' in record." % tag) for field in rec[tag]: if field[4] == field_position_global: return field raise InvenioBibRecordFieldError( "No field has the tag '%s' and the " "global field position '%d'." % (tag, field_position_global)) else: try: return rec[tag][field_position_local] except KeyError: raise InvenioBibRecordFieldError("No tag '%s' in record." % tag) except IndexError: raise InvenioBibRecordFieldError( "No field has the tag '%s' and " "the local field position '%d'." % (tag, field_position_local)) def record_replace_field(rec, tag, new_field, field_position_global=None, field_position_local=None): """Replace a field with a new field.""" if field_position_global is None and field_position_local is None: raise InvenioBibRecordFieldError( "A field position is required to " "complete this operation.") elif field_position_global is not None and \ field_position_local is not None: raise InvenioBibRecordFieldError( "Only one field position is required " "to complete this operation.") elif field_position_global: if tag not in rec: raise InvenioBibRecordFieldError("No tag '%s' in record." % tag) replaced = False for position, field in enumerate(rec[tag]): if field[4] == field_position_global: rec[tag][position] = new_field replaced = True if not replaced: raise InvenioBibRecordFieldError( "No field has the tag '%s' and " "the global field position '%d'." % (tag, field_position_global)) else: try: rec[tag][field_position_local] = new_field except KeyError: raise InvenioBibRecordFieldError("No tag '%s' in record." % tag) except IndexError: raise InvenioBibRecordFieldError( "No field has the tag '%s' and " "the local field position '%d'." % (tag, field_position_local)) def record_get_subfields(rec, tag, field_position_global=None, field_position_local=None): """ Return the subfield of the matching field. One has to enter either a global field position or a local field position. :return: a list of subfield tuples (subfield code, value). :rtype: list """ field = record_get_field( rec, tag, field_position_global=field_position_global, field_position_local=field_position_local) return field[0] def record_delete_subfield_from(rec, tag, subfield_position, field_position_global=None, field_position_local=None): """ Delete subfield from position specified. Specify the subfield by tag, field number and subfield position. """ subfields = record_get_subfields( rec, tag, field_position_global=field_position_global, field_position_local=field_position_local) try: del subfields[subfield_position] except IndexError: from .scripts.xmlmarc2textmarc import create_marc_record recordMarc = create_marc_record(rec, 0, {"text-marc": 1, "aleph-marc": 0}) raise InvenioBibRecordFieldError( "The record : %(recordCode)s does not contain the subfield " "'%(subfieldIndex)s' inside the field (local: " "'%(fieldIndexLocal)s, global: '%(fieldIndexGlobal)s' ) of tag " "'%(tag)s'." % {"subfieldIndex": subfield_position, "fieldIndexLocal": str(field_position_local), "fieldIndexGlobal": str(field_position_global), "tag": tag, "recordCode": recordMarc}) if not subfields: if field_position_global is not None: for position, field in enumerate(rec[tag]): if field[4] == field_position_global: del rec[tag][position] else: del rec[tag][field_position_local] if not rec[tag]: del rec[tag] def record_add_subfield_into(rec, tag, subfield_code, value, subfield_position=None, field_position_global=None, field_position_local=None): """Add subfield into specified position. Specify the subfield by tag, field number and optionally by subfield position. """ subfields = record_get_subfields( rec, tag, field_position_global=field_position_global, field_position_local=field_position_local) if subfield_position is None: subfields.append((subfield_code, value)) else: subfields.insert(subfield_position, (subfield_code, value)) def record_modify_controlfield(rec, tag, controlfield_value, field_position_global=None, field_position_local=None): """Modify controlfield at position specified by tag and field number.""" field = record_get_field( rec, tag, field_position_global=field_position_global, field_position_local=field_position_local) new_field = (field[0], field[1], field[2], controlfield_value, field[4]) record_replace_field( rec, tag, new_field, field_position_global=field_position_global, field_position_local=field_position_local) def record_modify_subfield(rec, tag, subfield_code, value, subfield_position, field_position_global=None, field_position_local=None): """Modify subfield at specified position. Specify the subfield by tag, field number and subfield position. """ subfields = record_get_subfields( rec, tag, field_position_global=field_position_global, field_position_local=field_position_local) try: subfields[subfield_position] = (subfield_code, value) except IndexError: raise InvenioBibRecordFieldError( "There is no subfield with position '%d'." % subfield_position) def record_move_subfield(rec, tag, subfield_position, new_subfield_position, field_position_global=None, field_position_local=None): """Move subfield at specified position. Sspecify the subfield by tag, field number and subfield position to new subfield position. """ subfields = record_get_subfields( rec, tag, field_position_global=field_position_global, field_position_local=field_position_local) try: subfield = subfields.pop(subfield_position) subfields.insert(new_subfield_position, subfield) except IndexError: raise InvenioBibRecordFieldError( "There is no subfield with position '%d'." % subfield_position) def record_get_field_value(rec, tag, ind1=" ", ind2=" ", code=""): """Return first (string) value that matches specified field of the record. Returns empty string if not found. Parameters (tag, ind1, ind2, code) can contain wildcard %. Difference between wildcard % and empty '': - Empty char specifies that we are not interested in a field which has one of the indicator(s)/subfield specified. - Wildcard specifies that we are interested in getting the value of the field whatever the indicator(s)/subfield is. For e.g. consider the following record in MARC:: 100C5 $$a val1 555AB $$a val2 555AB val3 555 $$a val4 555A val5 .. doctest:: >>> record_get_field_value(record, '555', 'A', '', '') "val5" >>> record_get_field_value(record, '555', 'A', '%', '') "val3" >>> record_get_field_value(record, '555', 'A', '%', '%') "val2" >>> record_get_field_value(record, '555', 'A', 'B', '') "val3" >>> record_get_field_value(record, '555', '', 'B', 'a') "" >>> record_get_field_value(record, '555', '', '', 'a') "val4" >>> record_get_field_value(record, '555', '', '', '') "" >>> record_get_field_value(record, '%%%', '%', '%', '%') "val1" :param rec: a record structure as returned by create_record() :param tag: a 3 characters long string :param ind1: a 1 character long string :param ind2: a 1 character long string :param code: a 1 character long string :return: string value (empty if nothing found) """ # Note: the code is quite redundant for speed reasons (avoid calling # functions or doing tests inside loops) ind1, ind2 = _wash_indicators(ind1, ind2) if '%' in tag: # Wild card in tag. Must find all corresponding fields if code == '': # Code not specified. for field_tag, fields in rec.items(): if _tag_matches_pattern(field_tag, tag): for field in fields: if ind1 in ('%', field[1]) and ind2 in ('%', field[2]): # Return matching field value if not empty if field[3]: return field[3] elif code == '%': # Code is wildcard. Take first subfield of first matching field for field_tag, fields in rec.items(): if _tag_matches_pattern(field_tag, tag): for field in fields: if (ind1 in ('%', field[1]) and ind2 in ('%', field[2]) and field[0]): return field[0][0][1] else: # Code is specified. Take corresponding one for field_tag, fields in rec.items(): if _tag_matches_pattern(field_tag, tag): for field in fields: if ind1 in ('%', field[1]) and ind2 in ('%', field[2]): for subfield in field[0]: if subfield[0] == code: return subfield[1] else: # Tag is completely specified. Use tag as dict key if tag in rec: if code == '': # Code not specified. for field in rec[tag]: if ind1 in ('%', field[1]) and ind2 in ('%', field[2]): # Return matching field value if not empty # or return "" empty if not exist. if field[3]: return field[3] elif code == '%': # Code is wildcard. Take first subfield of first matching field for field in rec[tag]: if ind1 in ('%', field[1]) and ind2 in ('%', field[2]) and\ field[0]: return field[0][0][1] else: # Code is specified. Take corresponding one for field in rec[tag]: if ind1 in ('%', field[1]) and ind2 in ('%', field[2]): for subfield in field[0]: if subfield[0] == code: return subfield[1] # Nothing was found return "" def record_get_field_values(rec, tag, ind1=" ", ind2=" ", code="", filter_subfield_code="", filter_subfield_value="", filter_subfield_mode="e"): """Return the list of values for the specified field of the record. List can be filtered. Use filter_subfield_code and filter_subfield_value to search only in fields that have these values inside them as a subfield. filter_subfield_mode can have 3 different values: 'e' for exact search 's' for substring search 'r' for regexp search Returns empty list if nothing was found. Parameters (tag, ind1, ind2, code) can contain wildcard %. :param rec: a record structure as returned by create_record() :param tag: a 3 characters long string :param ind1: a 1 character long string :param ind2: a 1 character long string :param code: a 1 character long string :return: a list of strings """ tmp = [] ind1, ind2 = _wash_indicators(ind1, ind2) if filter_subfield_code and filter_subfield_mode == "r": reg_exp = re.compile(filter_subfield_value) tags = [] if '%' in tag: # Wild card in tag. Must find all corresponding tags and fields tags = [k for k in rec if _tag_matches_pattern(k, tag)] elif rec and tag in rec: tags = [tag] if code == '': # Code not specified. Consider field value (without subfields) for tag in tags: for field in rec[tag]: if (ind1 in ('%', field[1]) and ind2 in ('%', field[2]) and field[3]): tmp.append(field[3]) elif code == '%': # Code is wildcard. Consider all subfields for tag in tags: for field in rec[tag]: if ind1 in ('%', field[1]) and ind2 in ('%', field[2]): if filter_subfield_code: if filter_subfield_mode == "e": subfield_to_match = (filter_subfield_code, filter_subfield_value) if subfield_to_match in field[0]: for subfield in field[0]: tmp.append(subfield[1]) elif filter_subfield_mode == "s": if (dict(field[0]).get(filter_subfield_code, '')) \ .find(filter_subfield_value) > -1: for subfield in field[0]: tmp.append(subfield[1]) elif filter_subfield_mode == "r": if reg_exp.match(dict(field[0]) .get(filter_subfield_code, '')): for subfield in field[0]: tmp.append(subfield[1]) else: for subfield in field[0]: tmp.append(subfield[1]) else: # Code is specified. Consider all corresponding subfields for tag in tags: for field in rec[tag]: if ind1 in ('%', field[1]) and ind2 in ('%', field[2]): if filter_subfield_code: if filter_subfield_mode == "e": subfield_to_match = (filter_subfield_code, filter_subfield_value) if subfield_to_match in field[0]: for subfield in field[0]: if subfield[0] == code: tmp.append(subfield[1]) elif filter_subfield_mode == "s": if (dict(field[0]).get(filter_subfield_code, '')) \ .find(filter_subfield_value) > -1: for subfield in field[0]: if subfield[0] == code: tmp.append(subfield[1]) elif filter_subfield_mode == "r": if reg_exp.match(dict(field[0]) .get(filter_subfield_code, '')): for subfield in field[0]: if subfield[0] == code: tmp.append(subfield[1]) else: for subfield in field[0]: if subfield[0] == code: tmp.append(subfield[1]) # If tmp was not set, nothing was found return tmp def record_xml_output(rec, tags=None, order_fn=None): """Generate the XML for record 'rec'. :param rec: record :param tags: list of tags to be printed :return: string """ if tags is None: tags = [] if isinstance(tags, str): tags = [tags] if tags and '001' not in tags: # Add the missing controlfield. tags.append('001') marcxml = ['<record>'] # Add the tag 'tag' to each field in rec[tag] fields = [] if rec is not None: for tag in rec: if not tags or tag in tags: for field in rec[tag]: fields.append((tag, field)) if order_fn is None: record_order_fields(fields) else: record_order_fields(fields, order_fn) for field in fields: marcxml.append(field_xml_output(field[1], field[0])) marcxml.append('</record>') return '\n'.join(marcxml) def field_get_subfield_instances(field): """Return the list of subfields associated with field 'field'.""" return field[0] def field_get_subfield_values(field_instance, code): """Return subfield CODE values of the field instance FIELD.""" return [subfield_value for subfield_code, subfield_value in field_instance[0] if subfield_code == code] def field_get_subfield_codes(field_instance): """Return subfield codes of the field instance FIELD.""" return [subfield_code for subfield_code, subfield_value in field_instance[0]] def field_add_subfield(field, code, value): """Add a subfield to field 'field'.""" field[0].append((code, value)) def record_order_fields(rec, fun="_order_by_ord"): """Order field inside record 'rec' according to a function.""" rec.sort(eval(fun)) def field_xml_output(field, tag): """Generate the XML for field 'field' and returns it as a string.""" marcxml = [] if field[3]: marcxml.append(' <controlfield tag="%s">%s</controlfield>' % (tag, encode_for_xml(field[3]))) else: marcxml.append(' <datafield tag="%s" ind1="%s" ind2="%s">' % (tag, field[1], field[2])) marcxml += [_subfield_xml_output(subfield) for subfield in field[0]] marcxml.append(' </datafield>') return '\n'.join(map(str, marcxml)) def record_extract_oai_id(record): """Return the OAI ID of the record.""" tag = cfg['CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG'][0:3] ind1 = cfg['CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG'][3] ind2 = cfg['CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG'][4] subfield = cfg['CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG'][5] values = record_get_field_values(record, tag, ind1, ind2, subfield) oai_id_regex = re.compile("oai[a-zA-Z0-9/.:]+") for value in [value.strip() for value in values]: if oai_id_regex.match(value): return value return "" def record_extract_dois(record): """Return the DOI(s) of the record.""" record_dois = [] tag = "024" ind1 = "7" ind2 = "_" subfield_source_code = "2" subfield_value_code = "a" identifiers_fields = record_get_field_instances(record, tag, ind1, ind2) for identifer_field in identifiers_fields: if 'doi' in [val.lower() for val in field_get_subfield_values(identifer_field, subfield_source_code)]: record_dois.extend( field_get_subfield_values( identifer_field, subfield_value_code)) return record_dois def print_rec(rec, format=1, tags=None): """ Print a record. :param format: 1 XML, 2 HTML (not implemented) :param tags: list of tags to be printed """ if tags is None: tags = [] if format == 1: text = record_xml_output(rec, tags) else: return '' return text def print_recs(listofrec, format=1, tags=None): """ Print a list of records. :param format: 1 XML, 2 HTML (not implemented) :param tags: list of tags to be printed if 'listofrec' is not a list it returns empty string """ if tags is None: tags = [] text = "" if type(listofrec).__name__ != 'list': return "" else: for rec in listofrec: text = "%s\n%s" % (text, print_rec(rec, format, tags)) return text def concat(alist): """Concatenate a list of lists.""" newl = [] for l in alist: newl.extend(l) return newl def record_find_field(rec, tag, field, strict=False): """ Return the global and local positions of the first occurrence of the field. :param rec: A record dictionary structure :type rec: dictionary :param tag: The tag of the field to search for :type tag: string :param field: A field tuple as returned by create_field() :type field: tuple :param strict: A boolean describing the search method. If strict is False, then the order of the subfields doesn't matter. Default search method is strict. :type strict: boolean :return: A tuple of (global_position, local_position) or a tuple (None, None) if the field is not present. :rtype: tuple :raise InvenioBibRecordFieldError: If the provided field is invalid. """ try: _check_field_validity(field) except InvenioBibRecordFieldError: raise for local_position, field1 in enumerate(rec.get(tag, [])): if _compare_fields(field, field1, strict): return (field1[4], local_position) return (None, None) def record_match_subfields(rec, tag, ind1=" ", ind2=" ", sub_key=None, sub_value='', sub_key2=None, sub_value2='', case_sensitive=True): """ Finds subfield instances in a particular field and tests values in 1 of 3 possible ways: - Does a subfield code exist? (ie does 773__a exist?) - Does a subfield have a particular value? (ie 773__a == 'PhysX') - Do a pair of subfields have particular values? (ie 035__2 == 'CDS' and 035__a == '123456') Parameters: * rec - dictionary: a bibrecord structure * tag - string: the tag of the field (ie '773') * ind1, ind2 - char: a single characters for the MARC indicators * sub_key - char: subfield key to find * sub_value - string: subfield value of that key * sub_key2 - char: key of subfield to compare against * sub_value2 - string: expected value of second subfield * case_sensitive - bool: be case sensitive when matching values Returns: false if no match found, else provides the field position (int) """ if sub_key is None: raise TypeError("None object passed for parameter sub_key.") if sub_key2 is not None and sub_value2 is '': raise TypeError("Parameter sub_key2 defined but sub_value2 is None, " + "function requires a value for comparrison.") ind1, ind2 = _wash_indicators(ind1, ind2) if not case_sensitive: sub_value = sub_value.lower() sub_value2 = sub_value2.lower() for field in record_get_field_instances(rec, tag, ind1, ind2): subfields = dict(field_get_subfield_instances(field)) if not case_sensitive: for k, v in subfields.iteritems(): subfields[k] = v.lower() if sub_key in subfields: if sub_value is '': return field[4] else: if sub_value == subfields[sub_key]: if sub_key2 is None: return field[4] else: if sub_key2 in subfields: if sub_value2 == subfields[sub_key2]: return field[4] return False def record_strip_empty_volatile_subfields(rec): """Remove unchanged volatile subfields from the record.""" for tag in rec.keys(): for field in rec[tag]: field[0][:] = [subfield for subfield in field[0] if subfield[1][:9] != "VOLATILE:"] def record_strip_empty_fields(rec, tag=None): """ Remove empty subfields and fields from the record. If 'tag' is not None, only a specific tag of the record will be stripped, otherwise the whole record. :param rec: A record dictionary structure :type rec: dictionary :param tag: The tag of the field to strip empty fields from :type tag: string """ # Check whole record if tag is None: tags = rec.keys() for tag in tags: record_strip_empty_fields(rec, tag) # Check specific tag of the record elif tag in rec: # in case of a controlfield if tag[:2] == '00': if len(rec[tag]) == 0 or not rec[tag][0][3]: del rec[tag] #in case of a normal field else: fields = [] for field in rec[tag]: subfields = [] for subfield in field[0]: # check if the subfield has been given a value if subfield[1]: # Always strip values subfield = (subfield[0], subfield[1].strip()) subfields.append(subfield) if len(subfields) > 0: new_field = create_field(subfields, field[1], field[2], field[3]) fields.append(new_field) if len(fields) > 0: rec[tag] = fields else: del rec[tag] def record_strip_controlfields(rec): """ Remove all non-empty controlfields from the record. :param rec: A record dictionary structure :type rec: dictionary """ for tag in rec.keys(): if tag[:2] == '00' and rec[tag][0][3]: del rec[tag] def record_order_subfields(rec, tag=None): """ Order subfields from a record alphabetically based on subfield code. If 'tag' is not None, only a specific tag of the record will be reordered, otherwise the whole record. :param rec: bibrecord :type rec: bibrec :param tag: tag where the subfields will be ordered :type tag: string """ if rec is None: return rec if tag is None: tags = rec.keys() for tag in tags: record_order_subfields(rec, tag) elif tag in rec: for i in xrange(len(rec[tag])): field = rec[tag][i] # Order subfields alphabetically by subfield code ordered_subfields = sorted(field[0], key=lambda subfield: subfield[0]) rec[tag][i] = (ordered_subfields, field[1], field[2], field[3], field[4]) def record_empty(rec): for key in rec.iterkeys(): if key not in ('001', '005'): return False return True ### IMPLEMENTATION / INVISIBLE FUNCTIONS def _compare_fields(field1, field2, strict=True): """ Compare 2 fields. If strict is True, then the order of the subfield will be taken care of, if not then the order of the subfields doesn't matter. :return: True if the field are equivalent, False otherwise. """ if strict: # Return a simple equal test on the field minus the position. return field1[:4] == field2[:4] else: if field1[1:4] != field2[1:4]: # Different indicators or controlfield value. return False else: # Compare subfields in a loose way. return set(field1[0]) == set(field2[0]) def _check_field_validity(field): """ Check if a field is well-formed. :param field: A field tuple as returned by create_field() :type field: tuple :raise InvenioBibRecordFieldError: If the field is invalid. """ if type(field) not in (list, tuple): raise InvenioBibRecordFieldError( "Field of type '%s' should be either " "a list or a tuple." % type(field)) if len(field) != 5: raise InvenioBibRecordFieldError( "Field of length '%d' should have 5 " "elements." % len(field)) if type(field[0]) not in (list, tuple): raise InvenioBibRecordFieldError( "Subfields of type '%s' should be " "either a list or a tuple." % type(field[0])) if type(field[1]) is not str: raise InvenioBibRecordFieldError( "Indicator 1 of type '%s' should be " "a string." % type(field[1])) if type(field[2]) is not str: raise InvenioBibRecordFieldError( "Indicator 2 of type '%s' should be " "a string." % type(field[2])) if type(field[3]) is not str: raise InvenioBibRecordFieldError( "Controlfield value of type '%s' " "should be a string." % type(field[3])) if type(field[4]) is not int: raise InvenioBibRecordFieldError( "Global position of type '%s' should " "be an int." % type(field[4])) for subfield in field[0]: if (type(subfield) not in (list, tuple) or len(subfield) != 2 or type(subfield[0]) is not str or type(subfield[1]) is not str): raise InvenioBibRecordFieldError( "Subfields are malformed. " "Should a list of tuples of 2 strings.") def _shift_field_positions_global(record, start, delta=1): """ Shift all global field positions. Shift all global field positions with global field positions higher or equal to 'start' from the value 'delta'. """ if not delta: return for tag, fields in record.items(): newfields = [] for field in fields: if field[4] < start: newfields.append(field) else: # Increment the global field position by delta. newfields.append(tuple(list(field[:4]) + [field[4] + delta])) record[tag] = newfields def _tag_matches_pattern(tag, pattern): """Return true if MARC 'tag' matches a 'pattern'. 'pattern' is plain text, with % as wildcard Both parameters must be 3 characters long strings. .. doctest:: >>> _tag_matches_pattern("909", "909") True >>> _tag_matches_pattern("909", "9%9") True >>> _tag_matches_pattern("909", "9%8") False :param tag: a 3 characters long string :param pattern: a 3 characters long string :return: False or True """ for char1, char2 in zip(tag, pattern): if char2 not in ('%', char1): return False return True def _validate_record_field_positions_global(record): """ Check if the global field positions in the record are valid. I.e., no duplicate global field positions and local field positions in the list of fields are ascending. :param record: the record data structure :return: the first error found as a string or None if no error was found """ all_fields = [] for tag, fields in record.items(): previous_field_position_global = -1 for field in fields: if field[4] < previous_field_position_global: return ("Non ascending global field positions in tag '%s'." % tag) previous_field_position_global = field[4] if field[4] in all_fields: return ("Duplicate global field position '%d' in tag '%s'" % (field[4], tag)) def get_fieldvalues(recIDs, tag, repetitive_values=True, sort=True, split_by=0): """ Return list of field values for field TAG for the given record. Record can be ID or list of record IDs. (RECIDS can be both an integer or a list of integers.) If REPETITIVE_VALUES is set to True, then return all values even if they are doubled. If set to False, then return unique values only. """ out = [] try: recIDs = int(recIDs) except: pass if isinstance(recIDs, (int, long)): recIDs = [recIDs] if not isinstance(recIDs, (list, tuple, intbitset)): return [] if len(recIDs) == 0: return [] if tag == "001___": # We have asked for tag 001 (=recID) that is not stored in bibXXx # tables. out = [str(recID) for recID in recIDs] else: # we are going to look inside bibXXx tables digits = tag[0:2] try: intdigits = int(digits) if intdigits < 0 or intdigits > 99: raise ValueError except ValueError: # invalid tag value asked for return [] bx = "bib%sx" % digits bibx = "bibrec_bib%sx" % digits if not repetitive_values: queryselect = "DISTINCT(bx.value)" else: queryselect = "bx.value" if sort: sort_sql = "ORDER BY bibx.field_number, bx.tag ASC" else: sort_sql = "" def get_res(recIDs): query = "SELECT %s FROM %s AS bx, %s AS bibx " \ "WHERE bibx.id_bibrec IN (%s) AND bx.id=bibx.id_bibxxx " \ "AND bx.tag LIKE %%s %s" % \ (queryselect, bx, bibx, ("%s," * len(recIDs))[:-1], sort_sql) return [i[0] for i in run_sql(query, tuple(recIDs) + (tag,))] if sort or split_by <= 0 or len(recIDs) <= split_by: return get_res(recIDs) else: return [i for res in map(get_res, zip(*[iter(recIDs)] * split_by)) for i in res] return out def get_fieldvalues_alephseq_like(recID, tags_in, can_see_hidden=False): """ Return buffer of ALEPH sequential-like textual format. Return buffer of ALEPH sequential-like textual format with fields found in the list TAGS_IN for record RECID. If can_see_hidden is True, just print everything. Otherwise hide fields from CFG_BIBFORMAT_HIDDEN_TAGS. """ out = "" if type(tags_in) is not list: tags_in = [tags_in] if len(tags_in) == 1 and len(tags_in[0]) == 6: ## case A: one concrete subfield asked, so print its value if found ## (use with care: can mislead if field has multiple ## occurrences) out += string.join(get_fieldvalues(recID, tags_in[0]), "\n") else: ## case B: print our "text MARC" format; works safely all the time # find out which tags to output: dict_of_tags_out = {} if not tags_in: for i in range(0, 10): for j in range(0, 10): dict_of_tags_out["%d%d%%" % (i, j)] = 1 else: for tag in tags_in: if len(tag) == 0: for i in range(0, 10): for j in range(0, 10): dict_of_tags_out["%d%d%%" % (i, j)] = 1 elif len(tag) == 1: for j in range(0, 10): dict_of_tags_out["%s%d%%" % (tag, j)] = 1 elif len(tag) < 5: dict_of_tags_out["%s%%" % tag] = 1 elif tag >= 6: dict_of_tags_out[tag[0:5]] = 1 tags_out = dict_of_tags_out.keys() tags_out.sort() # search all bibXXx tables as needed: for tag in tags_out: digits = tag[0:2] try: intdigits = int(digits) if intdigits < 0 or intdigits > 99: raise ValueError except ValueError: # invalid tag value asked for continue if tag.startswith("001") or tag.startswith("00%"): if out: out += "\n" out += "%09d %s %d" % (recID, "001__", recID) bx = "bib%sx" % digits bibx = "bibrec_bib%sx" % digits query = "SELECT b.tag,b.value,bb.field_number FROM %s AS b, %s " \ "AS bb WHERE bb.id_bibrec=%%s AND b.id=bb.id_bibxxx AND" \ " b.tag LIKE %%s ORDER BY bb.field_number, b.tag ASC" % \ (bx, bibx) res = run_sql(query, (recID, str(tag) + '%')) # go through fields: field_number_old = -999 field_old = "" for row in res: field, value, field_number = row[0], row[1], row[2] ind1, ind2 = field[3], field[4] printme = True #check the stuff in hiddenfields if not can_see_hidden: for htag in cfg['CFG_BIBFORMAT_HIDDEN_TAGS']: ltag = len(htag) samelenfield = field[0:ltag] if samelenfield == htag: printme = False if ind1 == "_": ind1 = "" if ind2 == "_": ind2 = "" # print field tag if printme: if field_number != field_number_old or \ field[:-1] != field_old[:-1]: if out: out += "\n" out += "%09d %s " % (recID, field[:5]) field_number_old = field_number field_old = field # print subfield value if field[0:2] == "00" and field[-1:] == "_": out += value else: out += "$$%s%s" % (field[-1:], value) return out def _record_sort_by_indicators(record): """Sort the fields inside the record by indicators.""" for tag, fields in record.items(): record[tag] = _fields_sort_by_indicators(fields) def _fields_sort_by_indicators(fields): """Sort a set of fields by their indicators. Return a sorted list with correct global field positions. """ field_dict = {} field_positions_global = [] for field in fields: field_dict.setdefault(field[1:3], []).append(field) field_positions_global.append(field[4]) indicators = field_dict.keys() indicators.sort() field_list = [] for indicator in indicators: for field in field_dict[indicator]: field_list.append(field[:4] + (field_positions_global.pop(0),)) return field_list def _select_parser(parser=None): """ Select the more relevant parser. Selection is based on the parsers available and on the parser desired by the user. """ if not AVAILABLE_PARSERS: # No parser is available. This is bad. return None if parser is None or parser not in AVAILABLE_PARSERS: # Return the best available parser. return AVAILABLE_PARSERS[0] else: return parser def _create_record_lxml(marcxml, verbose=CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL, correct=CFG_BIBRECORD_DEFAULT_CORRECT, keep_singletons=CFG_BIBRECORD_KEEP_SINGLETONS): """ Create a record object using the LXML parser. If correct == 1, then perform DTD validation If correct == 0, then do not perform DTD validation If verbose == 0, the parser will not give warnings. If 1 <= verbose <= 3, the parser will not give errors, but will warn the user about possible mistakes (implement me!) If verbose > 3 then the parser will be strict and will stop in case of well-formedness errors or DTD errors. """ parser = etree.XMLParser(dtd_validation=correct, recover=(verbose <= 3)) if correct: marcxml = '<?xml version="1.0" encoding="UTF-8"?>\n' \ '<!DOCTYPE collection SYSTEM "file://%s">\n' \ '<collection>\n%s\n</collection>' % (CFG_MARC21_DTD, marcxml) try: tree = etree.parse(StringIO(marcxml), parser) # parser errors are located in parser.error_log # if 1 <= verbose <=3 then show them to the user? # if verbose == 0 then continue # if verbose >3 then an exception will be thrown except Exception as e: raise InvenioBibRecordParserError(str(e)) record = {} field_position_global = 0 controlfield_iterator = tree.iter(tag='controlfield') for controlfield in controlfield_iterator: tag = controlfield.attrib.get('tag', '!').encode("UTF-8") ind1 = ' ' ind2 = ' ' text = controlfield.text if text is None: text = '' else: text = text.encode("UTF-8") subfields = [] if text or keep_singletons: field_position_global += 1 record.setdefault(tag, []).append((subfields, ind1, ind2, text, field_position_global)) datafield_iterator = tree.iter(tag='datafield') for datafield in datafield_iterator: tag = datafield.attrib.get('tag', '!').encode("UTF-8") ind1 = datafield.attrib.get('ind1', '!').encode("UTF-8") ind2 = datafield.attrib.get('ind2', '!').encode("UTF-8") #ind1, ind2 = _wash_indicators(ind1, ind2) if ind1 in ('', '_'): ind1 = ' ' if ind2 in ('', '_'): ind2 = ' ' subfields = [] subfield_iterator = datafield.iter(tag='subfield') for subfield in subfield_iterator: code = subfield.attrib.get('code', '!').encode("UTF-8") text = subfield.text if text is None: text = '' else: text = text.encode("UTF-8") if text or keep_singletons: subfields.append((code, text)) if subfields or keep_singletons: text = '' field_position_global += 1 record.setdefault(tag, []).append((subfields, ind1, ind2, text, field_position_global)) return record def _create_record_rxp(marcxml, verbose=CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL, correct=CFG_BIBRECORD_DEFAULT_CORRECT, keep_singletons=CFG_BIBRECORD_KEEP_SINGLETONS): """Create a record object using the RXP parser. If verbose>3 then the parser will be strict and will stop in case of well-formedness errors or DTD errors. If verbose=0, the parser will not give warnings. If 0 < verbose <= 3, the parser will not give errors, but will warn the user about possible mistakes correct != 0 -> We will try to correct errors such as missing attributes correct = 0 -> there will not be any attempt to correct errors """ if correct: # Note that with pyRXP < 1.13 a memory leak has been found # involving DTD parsing. So enable correction only if you have # pyRXP 1.13 or greater. marcxml = ('<?xml version="1.0" encoding="UTF-8"?>\n' '<!DOCTYPE collection SYSTEM "file://%s">\n' '<collection>\n%s\n</collection>' % (CFG_MARC21_DTD, marcxml)) # Create the pyRXP parser. + # See: http://pyrxp.readthedocs.org/en/latest/usage.html#list-of-flags pyrxp_parser = pyRXP.Parser(ErrorOnValidityErrors=0, ProcessDTD=1, ErrorOnUnquotedAttributeValues=0, srcName='string input') if verbose > 3: pyrxp_parser.ErrorOnValidityErrors = 1 pyrxp_parser.ErrorOnUnquotedAttributeValues = 1 try: root = pyrxp_parser.parse(marcxml) except pyRXP.error as ex1: raise InvenioBibRecordParserError(str(ex1)) # If record is enclosed in a collection tag, extract it. if root[TAG] == 'collection': children = _get_children_by_tag_name_rxp(root, 'record') if not children: return {} root = children[0] record = {} # This is needed because of the record_xml_output function, where we # need to know the order of the fields. field_position_global = 1 # Consider the control fields. for controlfield in _get_children_by_tag_name_rxp(root, 'controlfield'): if controlfield[CHILDREN]: value = ''.join([n for n in controlfield[CHILDREN]]) # Construct the field tuple. field = ([], ' ', ' ', value, field_position_global) record.setdefault(controlfield[ATTRS]['tag'], []).append(field) field_position_global += 1 elif keep_singletons: field = ([], ' ', ' ', '', field_position_global) record.setdefault(controlfield[ATTRS]['tag'], []).append(field) field_position_global += 1 # Consider the data fields. for datafield in _get_children_by_tag_name_rxp(root, 'datafield'): subfields = [] for subfield in _get_children_by_tag_name_rxp(datafield, 'subfield'): if subfield[CHILDREN]: value = _get_children_as_string_rxp(subfield[CHILDREN]) subfields.append((subfield[ATTRS].get('code', '!'), value)) elif keep_singletons: subfields.append((subfield[ATTRS].get('code', '!'), '')) if subfields or keep_singletons: # Create the field. tag = datafield[ATTRS].get('tag', '!') ind1 = datafield[ATTRS].get('ind1', '!') ind2 = datafield[ATTRS].get('ind2', '!') ind1, ind2 = _wash_indicators(ind1, ind2) # Construct the field tuple. field = (subfields, ind1, ind2, '', field_position_global) record.setdefault(tag, []).append(field) field_position_global += 1 return record def _create_record_from_document( document, keep_singletons=CFG_BIBRECORD_KEEP_SINGLETONS): """ Create a record from the document. Of type xml.dom.minidom.Document or Ft.Xml.Domlette.Document). """ root = None for node in document.childNodes: if node.nodeType == node.ELEMENT_NODE: root = node break if root is None: return {} if root.tagName == 'collection': children = _get_children_by_tag_name(root, 'record') if not children: return {} root = children[0] field_position_global = 1 record = {} for controlfield in _get_children_by_tag_name(root, "controlfield"): tag = controlfield.getAttributeNS(None, "tag").encode('utf-8') text_nodes = controlfield.childNodes value = ''.join([n.data for n in text_nodes]).encode("utf-8") if value or keep_singletons: field = ([], " ", " ", value, field_position_global) record.setdefault(tag, []).append(field) field_position_global += 1 for datafield in _get_children_by_tag_name(root, "datafield"): subfields = [] for subfield in _get_children_by_tag_name(datafield, "subfield"): value = _get_children_as_string(subfield.childNodes) \ .encode("utf-8") if value or keep_singletons: code = subfield.getAttributeNS(None, 'code').encode("utf-8") subfields.append((code or '!', value)) if subfields or keep_singletons: tag = datafield.getAttributeNS(None, "tag").encode("utf-8") or '!' ind1 = datafield.getAttributeNS(None, "ind1").encode("utf-8") ind2 = datafield.getAttributeNS(None, "ind2").encode("utf-8") ind1, ind2 = _wash_indicators(ind1, ind2) field = (subfields, ind1, ind2, "", field_position_global) record.setdefault(tag, []).append(field) field_position_global += 1 return record def _create_record_minidom(marcxml, keep_singletons=CFG_BIBRECORD_KEEP_SINGLETONS): """Create a record using minidom.""" try: dom = xml.dom.minidom.parseString(marcxml) except xml.parsers.expat.ExpatError as ex1: raise InvenioBibRecordParserError(str(ex1)) return _create_record_from_document(dom, keep_singletons=keep_singletons) def _create_record_4suite(marcxml, keep_singletons=CFG_BIBRECORD_KEEP_SINGLETONS): """Create a record using the 4suite parser.""" try: dom = Ft.Xml.Domlette.NonvalidatingReader.parseString(marcxml, "urn:dummy") except Ft.Xml.ReaderException as ex1: raise InvenioBibRecordParserError(ex1.message) return _create_record_from_document(dom, keep_singletons=keep_singletons) def _concat(alist): """Concatenate a list of lists.""" return [element for single_list in alist for element in single_list] def _subfield_xml_output(subfield): """Generate the XML for a subfield object and return it as a string.""" return ' <subfield code="%s">%s</subfield>' % \ (subfield[0], encode_for_xml(subfield[1])) def _order_by_ord(field1, field2): """Function used to order the fields according to their ord value.""" return cmp(field1[1][4], field2[1][4]) def _order_by_tags(field1, field2): """Function used to order the fields according to the tags.""" return cmp(field1[0], field2[0]) def _get_children_by_tag_name(node, name): """Retrieve all children from node 'node' with name 'name'.""" try: return [child for child in node.childNodes if child.nodeName == name] except TypeError: return [] def _get_children_by_tag_name_rxp(node, name): """Retrieve all children from 'children' with tag name 'tag'. children is a list returned by the RXP parser """ try: return [child for child in node[CHILDREN] if child[TAG] == name] except TypeError: return [] def _get_children_as_string(node): """Iterate through all the children of a node. Returns one string containing the values from all the text-nodes recursively. """ out = [] if node: for child in node: if child.nodeType == child.TEXT_NODE: out.append(child.data) else: out.append(_get_children_as_string(child.childNodes)) return ''.join(out) def _get_children_as_string_rxp(node): """ RXP version of _get_children_as_string(). Iterate through all the children of a node and returns one string containing the values from all the text-nodes recursively. """ out = [] if node: for child in node: if type(child) is str: out.append(child) else: out.append(_get_children_as_string_rxp(child[CHILDREN])) return ''.join(out) def _wash_indicators(*indicators): """ Wash the values of the indicators. An empty string or an underscore is replaced by a blank space. :param indicators: a series of indicators to be washed :return: a list of washed indicators """ return [indicator in ('', '_') and ' ' or indicator for indicator in indicators] def _correct_record(record): """ Check and correct the structure of the record. :param record: the record data structure :return: a list of errors found """ errors = [] for tag in record.keys(): upper_bound = '999' n = len(tag) if n > 3: i = n - 3 while i > 0: upper_bound = '%s%s' % ('0', upper_bound) i -= 1 # Missing tag. Replace it with dummy tag '000'. if tag == '!': errors.append((1, '(field number(s): ' + str([f[4] for f in record[tag]]) + ')')) record['000'] = record.pop(tag) tag = '000' elif not ('001' <= tag <= upper_bound or tag in ('FMT', 'FFT', 'BDR', 'BDM')): errors.append(2) record['000'] = record.pop(tag) tag = '000' fields = [] for field in record[tag]: # Datafield without any subfield. if field[0] == [] and field[3] == '': errors.append((8, '(field number: ' + str(field[4]) + ')')) subfields = [] for subfield in field[0]: if subfield[0] == '!': errors.append((3, '(field number: ' + str(field[4]) + ')')) newsub = ('', subfield[1]) else: newsub = subfield subfields.append(newsub) if field[1] == '!': errors.append((4, '(field number: ' + str(field[4]) + ')')) ind1 = " " else: ind1 = field[1] if field[2] == '!': errors.append((5, '(field number: ' + str(field[4]) + ')')) ind2 = " " else: ind2 = field[2] fields.append((subfields, ind1, ind2, field[3], field[4])) record[tag] = fields return errors def _warning(code): """ Return a warning message of code 'code'. If code = (cd, str) it returns the warning message of code 'cd' and appends str at the end """ if isinstance(code, str): return code message = '' if isinstance(code, tuple): if isinstance(code[0], str): message = code[1] code = code[0] return CFG_BIBRECORD_WARNING_MSGS.get(code, '') + message def _warnings(alist): """Apply the function _warning() to every element in alist.""" return [_warning(element) for element in alist] def _compare_lists(list1, list2, custom_cmp): """Compare twolists using given comparing function. :param list1: first list to compare :param list2: second list to compare :param custom_cmp: a function taking two arguments (element of list 1, element of list 2) and :return: True or False depending if the values are the same """ if len(list1) != len(list2): return False for element1, element2 in zip(list1, list2): if not custom_cmp(element1, element2): return False return True diff --git a/invenio/legacy/bibrecord/bibrecord_config.py b/invenio/legacy/bibrecord/bibrecord_config.py index 92ac60a15..2c4c1ad02 100644 --- a/invenio/legacy/bibrecord/bibrecord_config.py +++ b/invenio/legacy/bibrecord/bibrecord_config.py @@ -1,61 +1,61 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. ### CONFIGURATION OPTIONS FOR BIBRECORD LIBRARY """BibRecord configuration file. This file sets a list of errors that can be generated by BibRecord, the default behaviours for the parsers used and the parsers available.""" import pkg_resources # location of the MARC21 DTD file: CFG_MARC21_DTD = pkg_resources.resource_filename('invenio.legacy.bibrecord.data', 'MARC21slim.dtd') # internal dictionary of warning messages: CFG_BIBRECORD_WARNING_MSGS = { 0: "", 1: "WARNING: tag missing for field(s)\nValue stored with tag '000'", 2: "WARNING: bad range for tags (tag must be in range 001-999)\nValue stored with tag '000'", 3: "WARNING: Missing atribute 'code' for subfield\nValue stored with code ''", 4: "WARNING: Missing attribute 'ind1'\nValue stored with ind1 = ''", 5: "WARNING: Missing attribute 'ind2'\nValue stored with ind2 = ''", 6: "Import Error", 7: "WARNING: value expected of type string.", 8: "WARNING: empty datafield", 98: "WARNING: problems importing invenio", 99: "Document not well formed", } # verbose level to be used when creating records from XML: (0=least, ..., 9=most) CFG_BIBRECORD_DEFAULT_VERBOSE_LEVEL = 0 # correction level to be used when creating records from XML: (0=no, 1=yes) CFG_BIBRECORD_DEFAULT_CORRECT = 0 # XML parsers available: -CFG_BIBRECORD_PARSERS_AVAILABLE = ['pyrxp', 'lxml', '4suite', 'minidom'] +CFG_BIBRECORD_PARSERS_AVAILABLE = ['lxml', 'pyrxp', '4suite', 'minidom'] # Exceptions class InvenioBibRecordParserError(Exception): """A generic parsing exception for all available parsers.""" pass class InvenioBibRecordFieldError(Exception): """An generic error for BibRecord.""" pass diff --git a/invenio/modules/records/testsuite/test_legacy_record.py b/invenio/modules/records/testsuite/test_legacy_record.py index e4cd8cb4c..de8653c37 100644 --- a/invenio/modules/records/testsuite/test_legacy_record.py +++ b/invenio/modules/records/testsuite/test_legacy_record.py @@ -1,1803 +1,1803 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ The BibRecord test suite. """ import os import pkg_resources from invenio.base.wrappers import lazy_import from invenio.testsuite import make_test_suite, run_test_suite, InvenioTestCase bibrecord = lazy_import('invenio.legacy.bibrecord') bibrecord_config = lazy_import('invenio.legacy.bibrecord.bibrecord_config') -try: - import pyRXP - parser_pyrxp_available = True -except ImportError: - parser_pyrxp_available = False - try: from lxml import etree parser_lxml_available = True except ImportError: parser_lxml_available = False +try: + import pyRXP + parser_pyrxp_available = True +except ImportError: + parser_pyrxp_available = False + try: import Ft.Xml.Domlette parser_4suite_available = True except ImportError: parser_4suite_available = False try: import xml.dom.minidom import xml.parsers.expat parser_minidom_available = True except ImportError: parser_minidom_available = False class BibRecordSuccessTest(InvenioTestCase): """ bibrecord - demo file parsing test """ def setUp(self): """Initialize stuff""" xmltext = pkg_resources.resource_string('invenio.testsuite', os.path.join('data', 'demo_record_marc_data.xml')) self.recs = [rec[0] for rec in bibrecord.create_records(xmltext)] def test_records_created(self): """ bibrecord - demo file how many records are created """ self.assertEqual(141, len(self.recs)) def test_tags_created(self): """ bibrecord - demo file which tags are created """ ## check if the tags are correct tags = ['003', '005', '020', '024', '035', '037', '041', '080', '084', '088', '100', '110', '148', '150', '242', '245', '246', '250', '260', '269', '270', '300', '340', '371', '372', '400', '410', '430', '440', '450', '490', '500', '502', '506', '510', '520', '542', '550', '588', '590', '595', '643', '650', '653', '670', '678', '680', '690', '691', '693', '694', '695', '697', '700', '710', '711', '720', '773', '852', '856', '859', '901', '909', '913', '914', '916', '920', '960', '961', '962', '963', '964', '970', '980', '999', 'FFT'] t = [] for rec in self.recs: t.extend(rec.keys()) t.sort() #eliminate the elements repeated tt = [] for x in t: if not x in tt: tt.append(x) self.assertEqual(tags, tt) def test_fields_created(self): """bibrecord - demo file how many fields are created""" ## check if the number of fields for each record is correct fields = [14, 14, 8, 11, 11, 13, 11, 15, 10, 18, 15, 16, 10, 9, 15, 10, 11, 11, 11, 9, 11, 11, 10, 9, 9, 9, 10, 9, 10, 10, 8, 9, 8, 9, 14, 13, 14, 14, 15, 12, 13, 12, 15, 15, 13, 16, 16, 15, 15, 14, 16, 15, 15, 15, 16, 15, 16, 15, 15, 16, 15, 15, 14, 15, 12, 13, 11, 15, 8, 11, 14, 13, 12, 13, 6, 6, 25, 24, 27, 26, 26, 24, 26, 26, 25, 28, 24, 23, 27, 25, 25, 26, 26, 25, 20, 26, 25, 22, 9, 8, 9, 9, 8, 7, 19, 21, 27, 23, 23, 22, 9, 8, 16, 7, 7, 9, 5, 5, 3, 9, 12, 6, 8, 8, 8, 13, 20, 20, 5, 8, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7, 8] cr = [] ret = [] for rec in self.recs: cr.append(len(rec.values())) ret.append(rec) self.assertEqual(fields, cr, "\n%s\n!=\n%s" % (fields, cr)) def test_create_record_with_collection_tag(self): """ bibrecord - create_record() for single record in collection""" xmltext = """ <collection> <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> </record> </collection> """ record = bibrecord.create_record(xmltext) record1 = bibrecord.create_records(xmltext)[0] self.assertEqual(record1, record) class BibRecordParsersTest(InvenioTestCase): """ bibrecord - testing the creation of records with different parsers""" def setUp(self): """Initialize stuff""" self.xmltext = """ <!-- A first comment --> <collection> <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <!-- A second comment --> <subfield code="a">eng</subfield> </datafield> </record> </collection> """ self.expected_record = { '001': [([], ' ', ' ', '33', 1)], '041': [([('a', 'eng')], ' ', ' ', '', 2)] } - if parser_pyrxp_available: - def test_pyRXP(self): - """ bibrecord - create_record() with pyRXP """ - record = bibrecord._create_record_rxp(self.xmltext) - self.assertEqual(record, self.expected_record) - if parser_lxml_available: def test_lxml(self): """ bibrecord - create_record() with lxml""" record = bibrecord._create_record_lxml(self.xmltext) self.assertEqual(record, self.expected_record) + if parser_pyrxp_available: + def test_pyRXP(self): + """ bibrecord - create_record() with pyRXP """ + record = bibrecord._create_record_rxp(self.xmltext) + self.assertEqual(record, self.expected_record) + if parser_4suite_available: def test_4suite(self): """ bibrecord - create_record() with 4suite """ record = bibrecord._create_record_4suite(self.xmltext) self.assertEqual(record, self.expected_record) if parser_minidom_available: def test_minidom(self): """ bibrecord - create_record() with minidom """ record = bibrecord._create_record_minidom(self.xmltext) self.assertEqual(record, self.expected_record) class BibRecordDropDuplicateFieldsTest(InvenioTestCase): def test_drop_duplicate_fields(self): """bibrecord - testing record_drop_duplicate_fields()""" record = """ <record> <controlfield tag="001">123</controlfield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> <subfield code="u">Foo University</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> <subfield code="u">Foo University</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="u">Foo University</subfield> <subfield code="a">Doe, John</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> <subfield code="u">Foo University</subfield> <subfield code="a">Doe, John</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield cde="a">On the foo and bar</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> <subfield code="u">Foo University</subfield> </datafield> </record> """ record_result = """ <record> <controlfield tag="001">123</controlfield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> <subfield code="u">Foo University</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="u">Foo University</subfield> <subfield code="a">Doe, John</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> <subfield code="u">Foo University</subfield> <subfield code="a">Doe, John</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield cde="a">On the foo and bar</subfield> </datafield> </record> """ rec = bibrecord.create_record(record)[0] rec = bibrecord.record_drop_duplicate_fields(rec) rec2 = bibrecord.create_record(record_result)[0] self.maxDiff = None self.assertEqual(rec, rec2) class BibRecordBadInputTreatmentTest(InvenioTestCase): """ bibrecord - testing for bad input treatment """ def test_empty_collection(self): """bibrecord - empty collection""" xml_error0 = """<collection></collection>""" rec = bibrecord.create_record(xml_error0)[0] self.assertEqual(rec, {}) records = bibrecord.create_records(xml_error0) self.assertEqual(len(records), 0) def test_wrong_attribute(self): """bibrecord - bad input subfield \'cde\' instead of \'code\'""" ws = bibrecord.CFG_BIBRECORD_WARNING_MSGS xml_error1 = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield cde="a">On the foo and bar</subfield> </datafield> </record> """ e = bibrecord.create_record(xml_error1, 1, 1)[2] ee ='' for i in e: if type(i).__name__ == 'str': if i.count(ws[3])>0: ee = i self.assertEqual(bibrecord._warning((3, '(field number: 4)')), ee) def test_missing_attribute(self): """ bibrecord - bad input missing \"tag\" """ ws = bibrecord.CFG_BIBRECORD_WARNING_MSGS xml_error2 = """ <record> <controlfield tag="001">33</controlfield> <datafield ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">On the foo and bar</subfield> </datafield> </record> """ e = bibrecord.create_record(xml_error2, 1, 1)[2] ee = '' for i in e: if type(i).__name__ == 'str': if i.count(ws[1])>0: ee = i self.assertEqual(bibrecord._warning((1, '(field number(s): [2])')), ee) def test_empty_datafield(self): """ bibrecord - bad input no subfield """ ws = bibrecord.CFG_BIBRECORD_WARNING_MSGS xml_error3 = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">On the foo and bar</subfield> </datafield> </record> """ e = bibrecord.create_record(xml_error3, 1, 1)[2] ee = '' for i in e: if type(i).__name__ == 'str': if i.count(ws[8])>0: ee = i self.assertEqual(bibrecord._warning((8, '(field number: 2)')), ee) def test_missing_tag(self): """bibrecord - bad input missing end \"tag\" """ ws = bibrecord.CFG_BIBRECORD_WARNING_MSGS xml_error4 = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe, John</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">On the foo and bar</subfield> </record> """ e = bibrecord.create_record(xml_error4, 1, 1)[2] ee = '' for i in e: if type(i).__name__ == 'str': if i.count(ws[99])>0: ee = i self.assertEqual(bibrecord._warning((99, '(Tagname : datafield)')), ee) class BibRecordAccentedUnicodeLettersTest(InvenioTestCase): """ bibrecord - testing accented UTF-8 letters """ def setUp(self): """Initialize stuff""" self.xml_example_record = """<record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Döè1, John</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, J>ohn</subfield> <subfield code="b">editor</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">Пушкин</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> </record>""" self.rec = bibrecord.create_record(self.xml_example_record, 1, 1)[0] def test_accented_unicode_characters(self): """bibrecord - accented Unicode letters""" self.assertEqual(self.xml_example_record, bibrecord.record_xml_output(self.rec)) self.assertEqual(bibrecord.record_get_field_instances(self.rec, "100", " ", " "), [([('a', 'Döè1, John')], " ", " ", "", 3), ([('a', 'Doe2, J>ohn'), ('b', 'editor')], " ", " ", "", 4)]) self.assertEqual(bibrecord.record_get_field_instances(self.rec, "245", " ", "1"), [([('a', 'Пушкин')], " ", '1', "", 5)]) class BibRecordGettingFieldValuesTest(InvenioTestCase): """ bibrecord - testing for getting field/subfield values """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe1, John</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> <datafield tag="700" ind1=" " ind2="2"> <subfield code="a">Penrose, Roger</subfield> <subfield code="u">University College London</subfield> </datafield> <datafield tag="700" ind1=" " ind2="2"> <subfield code="a">Messi, Lionel</subfield> <subfield code="u">FC Barcelona</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_get_field_instances(self): """bibrecord - getting field instances""" self.assertEqual(bibrecord.record_get_field_instances(self.rec, "100", " ", " "), [([('a', 'Doe1, John')], " ", " ", "", 3), ([('a', 'Doe2, John'), ('b', 'editor')], " ", " ", "", 4)]) self.assertEqual(bibrecord.record_get_field_instances(self.rec, "", " ", " "), [('245', [([('a', 'On the foo and bar1')], " ", '1', "", 5), ([('a', 'On the foo and bar2')], " ", '2', "", 6)]), ('001', [([], " ", " ", '33', 1)]), ('700', [([('a', 'Penrose, Roger'), ('u', "University College London")], ' ', '2', '', 7), ([('a', 'Messi, Lionel'), ('u', 'FC Barcelona')], ' ', '2', '', 8)]), ('100', [([('a', 'Doe1, John')], " ", " ", "", 3), ([('a', 'Doe2, John'), ('b', 'editor')], " ", " ", "", 4)]), ('041', [([('a', 'eng')], " ", " ", "", 2)]),]) def test_get_field_values(self): """bibrecord - getting field values""" self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "a"), ['Doe1, John', 'Doe2, John']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "b"), ['editor']) def test_get_field_value(self): """bibrecord - getting first field value""" self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", " ", " ", "a"), 'Doe1, John') self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", " ", " ", "b"), 'editor') def test_get_subfield_values(self): """bibrecord - getting subfield values""" fi1, fi2 = bibrecord.record_get_field_instances(self.rec, "100", " ", " ") self.assertEqual(bibrecord.field_get_subfield_values(fi1, "b"), []) self.assertEqual(bibrecord.field_get_subfield_values(fi2, "b"), ["editor"]) def test_filter_field(self): """bibrecord - filter field instances""" field_instances = bibrecord.record_get_field_instances(self.rec, "700", "%", "%") out = bibrecord.filter_field_instances(field_instances, "u", "University College London", 'e') self.assertEqual(out, [([('a', 'Penrose, Roger'), ('u', "University College London")], ' ', '2', '', 7)]) out = bibrecord.filter_field_instances(field_instances, "u", "Bar", "s") self.assertEqual(out, [([('a', 'Messi, Lionel'), ('u', 'FC Barcelona')], ' ', '2', '', 8)]) out = bibrecord.filter_field_instances(field_instances, "u", "on", "s") self.assertEqual(out, [([('a', 'Penrose, Roger'), ('u', "University College London")], ' ', '2', '', 7), ([('a', 'Messi, Lionel'), ('u', 'FC Barcelona')], ' ', '2', '', 8)]) out = bibrecord.filter_field_instances(field_instances, "u", r".*\scoll", "r") self.assertEqual(out,[]) out = bibrecord.filter_field_instances(field_instances, "u", r"[FC]{2}\s.*", "r") self.assertEqual(out, [([('a', 'Messi, Lionel'), ('u', 'FC Barcelona')], ' ', '2', '', 8)]) class BibRecordGettingFieldValuesViaWildcardsTest(InvenioTestCase): """ bibrecord - testing for getting field/subfield values via wildcards """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">1</controlfield> <datafield tag="100" ind1="C" ind2="5"> <subfield code="a">val1</subfield> </datafield> <datafield tag="555" ind1="A" ind2="B"> <subfield code="a">val2</subfield> </datafield> <datafield tag="555" ind1="A" ind2=" "> <subfield code="a">val3</subfield> </datafield> <datafield tag="555" ind1=" " ind2=" "> <subfield code="a">val4a</subfield> <subfield code="b">val4b</subfield> </datafield> <datafield tag="555" ind1=" " ind2="B"> <subfield code="a">val5</subfield> </datafield> <datafield tag="556" ind1="A" ind2="C"> <subfield code="a">val6</subfield> </datafield> <datafield tag="556" ind1="A" ind2=" "> <subfield code="a">val7a</subfield> <subfield code="b">val7b</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_get_field_instances_via_wildcard(self): """bibrecord - getting field instances via wildcards""" self.assertEqual(bibrecord.record_get_field_instances(self.rec, "100", " ", " "), []) self.assertEqual(bibrecord.record_get_field_instances(self.rec, "100", "%", " "), []) self.assertEqual(bibrecord.record_get_field_instances(self.rec, "100", "%", "%"), [([('a', 'val1')], 'C', '5', "", 2)]) self.assertEqual(bibrecord.record_get_field_instances(self.rec, "55%", "A", "%"), [([('a', 'val2')], 'A', 'B', "", 3), ([('a', 'val3')], 'A', " ", "", 4), ([('a', 'val6')], 'A', 'C', "", 7), ([('a', 'val7a'), ('b', 'val7b')], 'A', " ", "", 8)]) self.assertEqual(bibrecord.record_get_field_instances(self.rec, "55%", "A", " "), [([('a', 'val3')], 'A', " ", "", 4), ([('a', 'val7a'), ('b', 'val7b')], 'A', " ", "", 8)]) self.assertEqual(bibrecord.record_get_field_instances(self.rec, "556", "A", " "), [([('a', 'val7a'), ('b', 'val7b')], 'A', " ", "", 8)]) def test_get_field_values_via_wildcard(self): """bibrecord - getting field values via wildcards""" self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", " "), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", "%", " ", " "), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", "%", " "), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", "%", "%", " "), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", "%", "%", "z"), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "%"), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "a"), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", "%", " ", "a"), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", "%", "%", "a"), ['val1']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", "%", "%", "%"), ['val1']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "A", "%", "a"), ['val2', 'val3', 'val6', 'val7a']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "A", " ", "a"), ['val3', 'val7a']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "556", "A", " ", "a"), ['val7a']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "555", " ", " ", " "), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "555", " ", " ", "z"), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "555", " ", " ", "%"), ['val4a', 'val4b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", " ", " ", "b"), ['val4b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "%", "%", "b"), ['val4b', 'val7b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "A", " ", "b"), ['val7b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "A", "%", "b"), ['val7b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "A", " ", "a"), ['val3', 'val7a']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "A", "%", "a"), ['val2', 'val3', 'val6', 'val7a']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "%", "%", "a"), ['val2', 'val3', 'val4a', 'val5', 'val6', 'val7a']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", " ", " ", "a"), ['val4a']) def test_get_field_values_filtering_exact(self): """bibrecord - getting field values and exact filtering""" self.assertEqual(bibrecord.record_get_field_values(self.rec, "556", "%", "%", "%", 'a', 'val7a'), ['val7a', 'val7b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "556", "%", "%", "a", 'a', 'val7a'), ['val7a']) def test_get_field_values_filtering_substring(self): """bibrecord - getting field values and substring filtering""" self.assertEqual(bibrecord.record_get_field_values(self.rec, "556", "%", "%", "%", 'a', '7a', 's'), ['val7a', 'val7b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "556", "%", "%", "b", 'a', '7a', 's'), ['val7b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "%", "%", "%", 'b', 'val', 's'), ['val4a', 'val4b', 'val7a', 'val7b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "%", "%", " ", 'b', 'val', 's'), []) def test_get_field_values_filtering_regexp(self): """bibrecord - getting field values and regexp filtering""" self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "%", "%", "%", 'b', r'al', 'r'), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "%", "%", "%", 'a', r'.*al[6,7]', 'r'), ['val6', 'val7a', 'val7b']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "55%", "%", "%", "a", 'a', r'.*al[6,7]', 'r'), ['val6', 'val7a']) def test_get_field_value_via_wildcard(self): """bibrecord - getting first field value via wildcards""" self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", " ", " ", " "), '') self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", "%", " ", " "), '') self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", " ", "%", " "), '') self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", "%", "%", " "), '') self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", " ", " ", "%"), '') self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", " ", " ", "a"), '') self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", "%", " ", "a"), '') self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", "%", "%", "a"), 'val1') self.assertEqual(bibrecord.record_get_field_value(self.rec, "100", "%", "%", "%"), 'val1') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", "A", "%", "a"), 'val2') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", "A", " ", "a"), 'val3') self.assertEqual(bibrecord.record_get_field_value(self.rec, "556", "A", " ", "a"), 'val7a') self.assertEqual(bibrecord.record_get_field_value(self.rec, "555", " ", " ", " "), '') self.assertEqual(bibrecord.record_get_field_value(self.rec, "555", " ", " ", "%"), 'val4a') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", " ", " ", "b"), 'val4b') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", "%", "%", "b"), 'val4b') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", "A", " ", "b"), 'val7b') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", "A", "%", "b"), 'val7b') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", "A", " ", "a"), 'val3') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", "A", "%", "a"), 'val2') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", "%", "%", "a"), 'val2') self.assertEqual(bibrecord.record_get_field_value(self.rec, "55%", " ", " ", "a"), 'val4a') class BibRecordAddFieldTest(InvenioTestCase): """ bibrecord - testing adding field """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe1, John</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_add_controlfield(self): """bibrecord - adding controlfield""" field_position_global_1 = bibrecord.record_add_field(self.rec, "003", controlfield_value="SzGeCERN") field_position_global_2 = bibrecord.record_add_field(self.rec, "004", controlfield_value="Test") self.assertEqual(field_position_global_1, 2) self.assertEqual(field_position_global_2, 3) self.assertEqual(bibrecord.record_get_field_values(self.rec, "003", " ", " ", ""), ['SzGeCERN']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "004", " ", " ", ""), ['Test']) def test_add_datafield(self): """bibrecord - adding datafield""" field_position_global_1 = bibrecord.record_add_field(self.rec, "100", subfields=[('a', 'Doe3, John')]) field_position_global_2 = bibrecord.record_add_field(self.rec, "100", subfields= [('a', 'Doe4, John'), ('b', 'editor')]) self.assertEqual(field_position_global_1, 5) self.assertEqual(field_position_global_2, 6) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "a"), ['Doe1, John', 'Doe2, John', 'Doe3, John', 'Doe4, John']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "b"), ['editor', 'editor']) def test_add_controlfield_on_desired_position(self): """bibrecord - adding controlfield on desired position""" field_position_global_1 = bibrecord.record_add_field(self.rec, "005", controlfield_value="Foo", field_position_global=0) field_position_global_2 = bibrecord.record_add_field(self.rec, "006", controlfield_value="Bar", field_position_global=0) self.assertEqual(field_position_global_1, 7) self.assertEqual(field_position_global_2, 8) def test_add_datafield_on_desired_position_field_position_global(self): """bibrecord - adding datafield on desired global field position""" field_position_global_1 = bibrecord.record_add_field(self.rec, "100", subfields=[('a', 'Doe3, John')], field_position_global=0) field_position_global_2 = bibrecord.record_add_field(self.rec, "100", subfields=[('a', 'Doe4, John'), ('b', 'editor')], field_position_global=0) self.assertEqual(field_position_global_1, 3) self.assertEqual(field_position_global_2, 3) def test_add_datafield_on_desired_position_field_position_local(self): """bibrecord - adding datafield on desired local field position""" field_position_global_1 = bibrecord.record_add_field(self.rec, "100", subfields=[('a', 'Doe3, John')], field_position_local=0) field_position_global_2 = bibrecord.record_add_field(self.rec, "100", subfields=[('a', 'Doe4, John'), ('b', 'editor')], field_position_local=2) self.assertEqual(field_position_global_1, 3) self.assertEqual(field_position_global_2, 5) class BibRecordManageMultipleFieldsTest(InvenioTestCase): """ bibrecord - testing the management of multiple fields """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">subfield1</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">subfield2</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">subfield3</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">subfield4</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_delete_multiple_datafields(self): """bibrecord - deleting multiple datafields""" self.fields = bibrecord.record_delete_fields(self.rec, '245', [1, 2]) self.assertEqual(self.fields[0], ([('a', 'subfield2')], ' ', ' ', '', 3)) self.assertEqual(self.fields[1], ([('a', 'subfield3')], ' ', ' ', '', 4)) def test_add_multiple_datafields_default_index(self): """bibrecord - adding multiple fields with the default index""" fields = [([('a', 'subfield5')], ' ', ' ', '', 4), ([('a', 'subfield6')], ' ', ' ', '', 19)] index = bibrecord.record_add_fields(self.rec, '245', fields) self.assertEqual(index, None) self.assertEqual(self.rec['245'][-2], ([('a', 'subfield5')], ' ', ' ', '', 6)) self.assertEqual(self.rec['245'][-1], ([('a', 'subfield6')], ' ', ' ', '', 7)) def test_add_multiple_datafields_with_index(self): """bibrecord - adding multiple fields with an index""" fields = [([('a', 'subfield5')], ' ', ' ', '', 4), ([('a', 'subfield6')], ' ', ' ', '', 19)] index = bibrecord.record_add_fields(self.rec, '245', fields, field_position_local=0) self.assertEqual(index, 0) self.assertEqual(self.rec['245'][0], ([('a', 'subfield5')], ' ', ' ', '', 2)) self.assertEqual(self.rec['245'][1], ([('a', 'subfield6')], ' ', ' ', '', 3)) self.assertEqual(self.rec['245'][2], ([('a', 'subfield1')], ' ', ' ', '', 4)) def test_move_multiple_fields(self): """bibrecord - move multiple fields""" bibrecord.record_move_fields(self.rec, '245', [1, 3]) self.assertEqual(self.rec['245'][0], ([('a', 'subfield1')], ' ', ' ', '', 2)) self.assertEqual(self.rec['245'][1], ([('a', 'subfield3')], ' ', ' ', '', 4)) self.assertEqual(self.rec['245'][2], ([('a', 'subfield2')], ' ', ' ', '', 5)) self.assertEqual(self.rec['245'][3], ([('a', 'subfield4')], ' ', ' ', '', 6)) class BibRecordDeleteFieldTest(InvenioTestCase): """ bibrecord - testing field deletion """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe1, John</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] xml_example_record_empty = """ <record> </record> """ self.rec_empty = bibrecord.create_record(xml_example_record_empty, 1, 1)[0] def test_delete_controlfield(self): """bibrecord - deleting controlfield""" bibrecord.record_delete_field(self.rec, "001", " ", " ") self.assertEqual(bibrecord.record_get_field_values(self.rec, "001", " ", " ", " "), []) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "b"), ['editor']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "245", " ", "2", "a"), ['On the foo and bar2']) def test_delete_datafield(self): """bibrecord - deleting datafield""" bibrecord.record_delete_field(self.rec, "100", " ", " ") self.assertEqual(bibrecord.record_get_field_values(self.rec, "001", " ", " ", ""), ['33']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "b"), []) bibrecord.record_delete_field(self.rec, "245", " ", " ") self.assertEqual(bibrecord.record_get_field_values(self.rec, "245", " ", "1", "a"), ['On the foo and bar1']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "245", " ", "2", "a"), ['On the foo and bar2']) bibrecord.record_delete_field(self.rec, "245", " ", "2") self.assertEqual(bibrecord.record_get_field_values(self.rec, "245", " ", "1", "a"), ['On the foo and bar1']) self.assertEqual(bibrecord.record_get_field_values(self.rec, "245", " ", "2", "a"), []) def test_add_delete_add_field_to_empty_record(self): """bibrecord - adding, deleting, and adding back a field to an empty record""" field_position_global_1 = bibrecord.record_add_field(self.rec_empty, "003", controlfield_value="SzGeCERN") self.assertEqual(field_position_global_1, 1) self.assertEqual(bibrecord.record_get_field_values(self.rec_empty, "003", " ", " ", ""), ['SzGeCERN']) bibrecord.record_delete_field(self.rec_empty, "003", " ", " ") self.assertEqual(bibrecord.record_get_field_values(self.rec_empty, "003", " ", " ", ""), []) field_position_global_1 = bibrecord.record_add_field(self.rec_empty, "003", controlfield_value="SzGeCERN2") self.assertEqual(field_position_global_1, 1) self.assertEqual(bibrecord.record_get_field_values(self.rec_empty, "003", " ", " ", ""), ['SzGeCERN2']) class BibRecordDeleteFieldFromTest(InvenioTestCase): """ bibrecord - testing field deletion from position""" def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe1, John</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_delete_field_from(self): """bibrecord - deleting field from position""" bibrecord.record_delete_field(self.rec, "100", field_position_global=4) self.assertEqual(self.rec['100'], [([('a', 'Doe1, John')], ' ', ' ', '', 3)]) bibrecord.record_delete_field(self.rec, "100", field_position_global=3) self.failIf('100' in self.rec) bibrecord.record_delete_field(self.rec, "001", field_position_global=1) bibrecord.record_delete_field(self.rec, "245", field_position_global=6) self.failIf('001' in self.rec) self.assertEqual(self.rec['245'], [([('a', 'On the foo and bar1')], ' ', '1', '', 5)]) # Some crash tests bibrecord.record_delete_field(self.rec, '999', field_position_global=1) bibrecord.record_delete_field(self.rec, '245', field_position_global=999) class BibRecordAddSubfieldIntoTest(InvenioTestCase): """ bibrecord - testing subfield addition """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_add_subfield_into(self): """bibrecord - adding subfield into position""" bibrecord.record_add_subfield_into(self.rec, "100", "b", "Samekniv", field_position_global=3) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "b"), ['editor', 'Samekniv']) bibrecord.record_add_subfield_into(self.rec, "245", "x", "Elgokse", field_position_global=4) bibrecord.record_add_subfield_into(self.rec, "245", "x", "Fiskeflue", subfield_position=0, field_position_global=4) bibrecord.record_add_subfield_into(self.rec, "245", "z", "Ulriken", subfield_position=2, field_position_global=4) bibrecord.record_add_subfield_into(self.rec, "245", "z", "Stortinget", subfield_position=999, field_position_global=4) self.assertEqual(bibrecord.record_get_field_values(self.rec, "245", " ", "1", "%"), ['Fiskeflue', 'On the foo and bar1', 'Ulriken', 'Elgokse', 'Stortinget']) # Some crash tests self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_add_subfield_into, self.rec, "187", "x", "Crash", field_position_global=1) self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_add_subfield_into, self.rec, "245", "x", "Crash", field_position_global=999) class BibRecordModifyControlfieldTest(InvenioTestCase): """ bibrecord - testing controlfield modification """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <controlfield tag="005">A Foo's Tale</controlfield> <controlfield tag="008">Skeech Skeech</controlfield> <controlfield tag="008">Whoop Whoop</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_modify_controlfield(self): """bibrecord - modify controlfield""" bibrecord.record_modify_controlfield(self.rec, "001", "34", field_position_global=1) bibrecord.record_modify_controlfield(self.rec, "008", "Foo Foo", field_position_global=3) self.assertEqual(bibrecord.record_get_field_values(self.rec, "001"), ["34"]) self.assertEqual(bibrecord.record_get_field_values(self.rec, "005"), ["A Foo's Tale"]) self.assertEqual(bibrecord.record_get_field_values(self.rec, "008"), ["Foo Foo", "Whoop Whoop"]) # Some crash tests self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_modify_controlfield, self.rec, "187", "Crash", field_position_global=1) self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_modify_controlfield, self.rec, "008", "Test", field_position_global=10) self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_modify_controlfield, self.rec, "245", "Burn", field_position_global=5) self.assertEqual(bibrecord.record_get_field_values(self.rec, "245", " ", "2", "%"), ["On the foo and bar2"]) class BibRecordModifySubfieldTest(InvenioTestCase): """ bibrecord - testing subfield modification """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> <subfield code="b">On writing unit tests</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_modify_subfield(self): """bibrecord - modify subfield""" bibrecord.record_modify_subfield(self.rec, "245", "a", "Holmenkollen", 0, field_position_global=4) bibrecord.record_modify_subfield(self.rec, "245", "x", "Brann", 1, field_position_global=4) self.assertEqual(bibrecord.record_get_field_values(self.rec, "245", " ", "1", "%"), ['Holmenkollen', 'Brann']) # Some crash tests self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_modify_subfield, self.rec, "187", "x", "Crash", 0, field_position_global=1) self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_modify_subfield, self.rec, "245", "x", "Burn", 1, field_position_global=999) self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_modify_subfield, self.rec, "245", "a", "Burn", 999, field_position_global=4) class BibRecordDeleteSubfieldFromTest(InvenioTestCase): """ bibrecord - testing subfield deletion """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> <subfield code="z">Skal vi danse?</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_delete_subfield_from(self): """bibrecord - delete subfield from position""" bibrecord.record_delete_subfield_from(self.rec, "100", 2, field_position_global=3) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "z"), []) bibrecord.record_delete_subfield_from(self.rec, "100", 0, field_position_global=3) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "%"), ['editor']) bibrecord.record_delete_subfield_from(self.rec, "100", 0, field_position_global=3) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "%"), []) # Some crash tests self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_delete_subfield_from, self.rec, "187", 0, field_position_global=1) self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_delete_subfield_from, self.rec, "245", 0, field_position_global=999) self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_delete_subfield_from, self.rec, "245", 999, field_position_global=4) class BibRecordDeleteSubfieldTest(InvenioTestCase): """ bibrecord - testing subfield deletion """ def setUp(self): """Initialize stuff""" self.xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> <subfield code="z">Skal vi danse?</subfield> <subfield code="a">Doe3, Zbigniew</subfield> <subfield code="d">Doe4, Joachim</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> </datafield> <datafield tag="245" ind1=" " ind2="2"> <subfield code="a">On the foo and bar2</subfield> </datafield> <datafield tag="246" ind1="1" ind2="2"> <subfield code="c">On the foo and bar1</subfield> </datafield> <datafield tag="246" ind1="1" ind2="2"> <subfield code="c">On the foo and bar2</subfield> </datafield> </record> """ def test_simple_removals(self): """ bibrecord - delete subfield by its code""" # testing a simple removals where all the fields are removed rec = bibrecord.create_record(self.xml_example_record, 1, 1)[0] bibrecord.record_delete_subfield(rec, "041", "b") # nothing should change self.assertEqual(rec["041"][0][0], [("a", "eng")]) bibrecord.record_delete_subfield(rec, "041", "a") self.assertEqual(rec["041"][0][0], []) def test_indices_important(self): """ bibrecord - delete subfield where indices are important""" rec = bibrecord.create_record(self.xml_example_record, 1, 1)[0] bibrecord.record_delete_subfield(rec, "245", "a", " ", "1") self.assertEqual(rec["245"][0][0], []) self.assertEqual(rec["245"][1][0], [("a", "On the foo and bar2")]) bibrecord.record_delete_subfield(rec, "245", "a", " ", "2") self.assertEqual(rec["245"][1][0], []) def test_remove_some(self): """ bibrecord - delete subfield when some should be preserved and some removed""" rec = bibrecord.create_record(self.xml_example_record, 1, 1)[0] bibrecord.record_delete_subfield(rec, "100", "a", " ", " ") self.assertEqual(rec["100"][0][0], [("b", "editor"), ("z", "Skal vi danse?"), ("d", "Doe4, Joachim")]) def test_more_fields(self): """ bibrecord - delete subfield where more fits criteria""" rec = bibrecord.create_record(self.xml_example_record, 1, 1)[0] bibrecord.record_delete_subfield(rec, "246", "c", "1", "2") self.assertEqual(rec["246"][1][0], []) self.assertEqual(rec["246"][0][0], []) def test_nonexisting_removals(self): """ bibrecord - delete subfield that does not exist """ rec = bibrecord.create_record(self.xml_example_record, 1, 1)[0] # further preparation bibrecord.record_delete_subfield(rec, "100", "a", " ", " ") self.assertEqual(rec["100"][0][0], [("b", "editor"), ("z", "Skal vi danse?"), ("d", "Doe4, Joachim")]) #the real tests begin # 1) removing the subfield from an empty list of subfields bibrecord.record_delete_subfield(rec, "246", "c", "1", "2") self.assertEqual(rec["246"][1][0], []) self.assertEqual(rec["246"][0][0], []) bibrecord.record_delete_subfield(rec, "246", "8", "1", "2") self.assertEqual(rec["246"][1][0], []) self.assertEqual(rec["246"][0][0], []) # 2) removing a subfield from a field that has some subfields but none has an appropriate code bibrecord.record_delete_subfield(rec, "100", "a", " ", " ") self.assertEqual(rec["100"][0][0], [("b", "editor"), ("z", "Skal vi danse?"), ("d", "Doe4, Joachim")]) bibrecord.record_delete_subfield(rec, "100", "e", " ", " ") self.assertEqual(rec["100"][0][0], [("b", "editor"), ("z", "Skal vi danse?"), ("d", "Doe4, Joachim")]) class BibRecordMoveSubfieldTest(InvenioTestCase): """ bibrecord - testing subfield moving """ def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> <subfield code="c">fisk</subfield> <subfield code="d">eple</subfield> <subfield code="e">hammer</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_move_subfield(self): """bibrecord - move subfields""" bibrecord.record_move_subfield(self.rec, "100", 2, 4, field_position_global=3) bibrecord.record_move_subfield(self.rec, "100", 1, 0, field_position_global=3) bibrecord.record_move_subfield(self.rec, "100", 2, 999, field_position_global=3) self.assertEqual(bibrecord.record_get_field_values(self.rec, "100", " ", " ", "%"), ['editor', 'Doe2, John', 'hammer', 'fisk', 'eple']) # Some crash tests self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_move_subfield, self.rec, "187", 0, 1, field_position_global=3) self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_move_subfield, self.rec, "100", 1, 0, field_position_global=999) self.assertRaises(bibrecord.InvenioBibRecordFieldError, bibrecord.record_move_subfield, self.rec, "100", 999, 0, field_position_global=3) class BibRecordCompareSubfieldTest(InvenioTestCase): """ bibrecord - """ - + def setUp(self): """Initialize stuff""" xml_example_record = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Doe2, John</subfield> <subfield code="b">editor</subfield> <subfield code="c">fisk</subfield> <subfield code="d">eple</subfield> <subfield code="e">hammer</subfield> </datafield> <datafield tag="245" ind1=" " ind2="1"> <subfield code="a">On the foo and bar1</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] # For simplicity, create an alias of the function self._func = bibrecord.record_match_subfields def test_check_subfield_exists(self): self.assertEqual(self._func(self.rec, '100', sub_key='a'), 3) self.assertEqual(self._func(self.rec, '100', sub_key='e'), 3) self.assertFalse(self._func(self.rec, '245', sub_key='a')) self.assertEqual(self._func(self.rec, '245', ind2='1', sub_key='a'), 4) self.assertFalse(self._func(self.rec, '999', sub_key='x')) self.assertFalse(self._func(self.rec, '100', sub_key='x')) def test_check_subfield_values(self): self.assertEqual(self._func(self.rec, '100', sub_key='b', sub_value='editor'), 3) self.assertEqual(self._func(self.rec, '245', ind2='1', sub_key='a', sub_value='On the foo and bar1'), 4) self.assertEqual(self._func(self.rec, '100', sub_key='e', sub_value='ponies suck'), False) self.assertEqual(self._func(self.rec, '100', sub_key='c', sub_value='FISK'), False) self.assertEqual(self._func(self.rec, '100', sub_key='c', sub_value='FISK', case_sensitive=False), 3) def test_compare_subfields(self): self.assertEqual(self._func(self.rec, '100', sub_key='c', sub_value='fisk', sub_key2='d', sub_value2='eple'), 3) self.assertFalse(self._func(self.rec, '100', sub_key='c', sub_value='fisk', sub_key2='d', sub_value2='tom')) self.assertEqual(self._func(self.rec, '100', sub_key='c', sub_value='fiSk', sub_key2='d', sub_value2='Eple', case_sensitive=False), 3) def test_error_conditions(self): self.assertRaises(TypeError, self._func, self.rec, '100') self.assertRaises(TypeError, self._func, self.rec, '100', sub_key='a', sub_value='fiSk', sub_key2='d') class BibRecordSpecialTagParsingTest(InvenioTestCase): """ bibrecord - parsing special tags (FMT, FFT)""" def setUp(self): """setting up example records""" self.xml_example_record_with_fmt = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="FMT" ind1=" " ind2=" "> <subfield code="f">HB</subfield> <subfield code="g">Let us see if this gets inserted well.</subfield> </datafield> </record> """ self.xml_example_record_with_fft = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="a">file:///foo.pdf</subfield> <subfield code="a">http://bar.com/baz.ps.gz</subfield> </datafield> </record> """ self.xml_example_record_with_xyz = """ <record> <controlfield tag="001">33</controlfield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="XYZ" ind1=" " ind2=" "> <subfield code="f">HB</subfield> <subfield code="g">Let us see if this gets inserted well.</subfield> </datafield> </record> """ def test_parsing_file_containing_fmt_special_tag_with_correcting(self): """bibrecord - parsing special FMT tag, correcting on""" rec = bibrecord.create_record(self.xml_example_record_with_fmt, 1, 1)[0] self.assertEqual(rec, {u'001': [([], " ", " ", '33', 1)], 'FMT': [([('f', 'HB'), ('g', 'Let us see if this gets inserted well.')], " ", " ", "", 3)], '041': [([('a', 'eng')], " ", " ", "", 2)]}) self.assertEqual(bibrecord.record_get_field_values(rec, "041", " ", " ", "a"), ['eng']) self.assertEqual(bibrecord.record_get_field_values(rec, "FMT", " ", " ", "f"), ['HB']) self.assertEqual(bibrecord.record_get_field_values(rec, "FMT", " ", " ", "g"), ['Let us see if this gets inserted well.']) def test_parsing_file_containing_fmt_special_tag_without_correcting(self): """bibrecord - parsing special FMT tag, correcting off""" rec = bibrecord.create_record(self.xml_example_record_with_fmt, 1, 0)[0] self.assertEqual(rec, {u'001': [([], " ", " ", '33', 1)], 'FMT': [([('f', 'HB'), ('g', 'Let us see if this gets inserted well.')], " ", " ", "", 3)], '041': [([('a', 'eng')], " ", " ", "", 2)]}) self.assertEqual(bibrecord.record_get_field_values(rec, "041", " ", " ", "a"), ['eng']) self.assertEqual(bibrecord.record_get_field_values(rec, "FMT", " ", " ", "f"), ['HB']) self.assertEqual(bibrecord.record_get_field_values(rec, "FMT", " ", " ", "g"), ['Let us see if this gets inserted well.']) def test_parsing_file_containing_fft_special_tag_with_correcting(self): """bibrecord - parsing special FFT tag, correcting on""" rec = bibrecord.create_record(self.xml_example_record_with_fft, 1, 1)[0] self.assertEqual(rec, {u'001': [([], " ", " ", '33', 1)], 'FFT': [([('a', 'file:///foo.pdf'), ('a', 'http://bar.com/baz.ps.gz')], " ", " ", "", 3)], '041': [([('a', 'eng')], " ", " ", "", 2)]}) self.assertEqual(bibrecord.record_get_field_values(rec, "041", " ", " ", "a"), ['eng']) self.assertEqual(bibrecord.record_get_field_values(rec, "FFT", " ", " ", "a"), ['file:///foo.pdf', 'http://bar.com/baz.ps.gz']) def test_parsing_file_containing_fft_special_tag_without_correcting(self): """bibrecord - parsing special FFT tag, correcting off""" rec = bibrecord.create_record(self.xml_example_record_with_fft, 1, 0)[0] self.assertEqual(rec, {u'001': [([], " ", " ", '33', 1)], 'FFT': [([('a', 'file:///foo.pdf'), ('a', 'http://bar.com/baz.ps.gz')], " ", " ", "", 3)], '041': [([('a', 'eng')], " ", " ", "", 2)]}) self.assertEqual(bibrecord.record_get_field_values(rec, "041", " ", " ", "a"), ['eng']) self.assertEqual(bibrecord.record_get_field_values(rec, "FFT", " ", " ", "a"), ['file:///foo.pdf', 'http://bar.com/baz.ps.gz']) def test_parsing_file_containing_xyz_special_tag_with_correcting(self): """bibrecord - parsing unrecognized special XYZ tag, correcting on""" # XYZ should not get accepted when correcting is on; should get changed to 000 rec = bibrecord.create_record(self.xml_example_record_with_xyz, 1, 1)[0] self.assertEqual(rec, {u'001': [([], " ", " ", '33', 1)], '000': [([('f', 'HB'), ('g', 'Let us see if this gets inserted well.')], " ", " ", "", 3)], '041': [([('a', 'eng')], " ", " ", "", 2)]}) self.assertEqual(bibrecord.record_get_field_values(rec, "041", " ", " ", "a"), ['eng']) self.assertEqual(bibrecord.record_get_field_values(rec, "XYZ", " ", " ", "f"), []) self.assertEqual(bibrecord.record_get_field_values(rec, "XYZ", " ", " ", "g"), []) self.assertEqual(bibrecord.record_get_field_values(rec, "000", " ", " ", "f"), ['HB']) self.assertEqual(bibrecord.record_get_field_values(rec, "000", " ", " ", "g"), ['Let us see if this gets inserted well.']) def test_parsing_file_containing_xyz_special_tag_without_correcting(self): """bibrecord - parsing unrecognized special XYZ tag, correcting off""" # XYZ should get accepted without correcting rec = bibrecord.create_record(self.xml_example_record_with_xyz, 1, 0)[0] self.assertEqual(rec, {u'001': [([], " ", " ", '33', 1)], 'XYZ': [([('f', 'HB'), ('g', 'Let us see if this gets inserted well.')], " ", " ", "", 3)], '041': [([('a', 'eng')], " ", " ", "", 2)]}) self.assertEqual(bibrecord.record_get_field_values(rec, "041", " ", " ", "a"), ['eng']) self.assertEqual(bibrecord.record_get_field_values(rec, "XYZ", " ", " ", "f"), ['HB']) self.assertEqual(bibrecord.record_get_field_values(rec, "XYZ", " ", " ", "g"), ['Let us see if this gets inserted well.']) class BibRecordPrintingTest(InvenioTestCase): """ bibrecord - testing for printing record """ def setUp(self): """Initialize stuff""" self.xml_example_record = """ <record> <controlfield tag="001">81</controlfield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">TEST-ARTICLE-2006-001</subfield> </datafield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">ARTICLE-2006-001</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">Test ti</subfield> </datafield> </record>""" self.xml_example_record_short = """ <record> <controlfield tag="001">81</controlfield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">TEST-ARTICLE-2006-001</subfield> </datafield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">ARTICLE-2006-001</subfield> </datafield> </record>""" self.xml_example_multi_records = """ <record> <controlfield tag="001">81</controlfield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">TEST-ARTICLE-2006-001</subfield> </datafield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">ARTICLE-2006-001</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">Test ti</subfield> </datafield> </record> <record> <controlfield tag="001">82</controlfield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Author, t</subfield> </datafield> </record>""" self.xml_example_multi_records_short = """ <record> <controlfield tag="001">81</controlfield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">TEST-ARTICLE-2006-001</subfield> </datafield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">ARTICLE-2006-001</subfield> </datafield> </record> <record> <controlfield tag="001">82</controlfield> </record>""" def test_record_xml_output(self): """bibrecord - xml output""" rec = bibrecord.create_record(self.xml_example_record, 1, 1)[0] rec_short = bibrecord.create_record(self.xml_example_record_short, 1, 1)[0] self.assertEqual(bibrecord.create_record(bibrecord.record_xml_output(rec, tags=[]), 1, 1)[0], rec) self.assertEqual(bibrecord.create_record(bibrecord.record_xml_output(rec, tags=["001", "037"]), 1, 1)[0], rec_short) self.assertEqual(bibrecord.create_record(bibrecord.record_xml_output(rec, tags=["037"]), 1, 1)[0], rec_short) class BibRecordCreateFieldTest(InvenioTestCase): """ bibrecord - testing for creating field """ def test_create_valid_field(self): """bibrecord - create and check a valid field""" bibrecord.create_field() bibrecord.create_field([('a', 'testa'), ('b', 'testb')], '2', 'n', 'controlfield', 15) def test_invalid_field_raises_exception(self): """bibrecord - exception raised when creating an invalid field""" # Invalid subfields. self.assertRaises(bibrecord_config.InvenioBibRecordFieldError, bibrecord.create_field, 'subfields', '1', '2', 'controlfield', 10) self.assertRaises(bibrecord_config.InvenioBibRecordFieldError, bibrecord.create_field, ('1', 'value'), '1', '2', 'controlfield', 10) self.assertRaises(bibrecord_config.InvenioBibRecordFieldError, bibrecord.create_field, [('value')], '1', '2', 'controlfield', 10) self.assertRaises(bibrecord_config.InvenioBibRecordFieldError, bibrecord.create_field, [('1', 'value', '2')], '1', '2', 'controlfield', 10) # Invalid indicators. self.assertRaises(bibrecord_config.InvenioBibRecordFieldError, bibrecord.create_field, [], 1, '2', 'controlfield', 10) self.assertRaises(bibrecord_config.InvenioBibRecordFieldError, bibrecord.create_field, [], '1', 2, 'controlfield', 10) # Invalid controlfield value self.assertRaises(bibrecord_config.InvenioBibRecordFieldError, bibrecord.create_field, [], '1', '2', 13, 10) # Invalid global position self.assertRaises(bibrecord_config.InvenioBibRecordFieldError, bibrecord.create_field, [], '1', '2', 'controlfield', 'position') def test_compare_fields(self): """bibrecord - compare fields""" # Identical field0 = ([('a', 'test')], '1', '2', '', 0) field1 = ([('a', 'test')], '1', '2', '', 3) self.assertEqual(True, bibrecord._compare_fields(field0, field1, strict=True)) self.assertEqual(True, bibrecord._compare_fields(field0, field1, strict=False)) # Order of the subfields changed. field0 = ([('a', 'testa'), ('b', 'testb')], '1', '2', '', 0) field1 = ([('b', 'testb'), ('a', 'testa')], '1', '2', '', 3) self.assertEqual(False, bibrecord._compare_fields(field0, field1, strict=True)) self.assertEqual(True, bibrecord._compare_fields(field0, field1, strict=False)) # Different field0 = ([], '3', '2', '', 0) field1 = ([], '1', '2', '', 3) self.assertEqual(False, bibrecord._compare_fields(field0, field1, strict=True)) self.assertEqual(False, bibrecord._compare_fields(field0, field1, strict=False)) class BibRecordFindFieldTest(InvenioTestCase): """ bibrecord - testing for finding field """ def setUp(self): """Initialize stuff""" xml = """ <record> <controlfield tag="001">81</controlfield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">TEST-ARTICLE-2006-001</subfield> <subfield code="b">ARTICLE-2007-001</subfield> </datafield> </record> """ self.rec = bibrecord.create_record(xml)[0] self.field0 = self.rec['001'][0] self.field1 = self.rec['037'][0] self.field2 = ( [self.field1[0][1], self.field1[0][0]], self.field1[1], self.field1[2], self.field1[3], self.field1[4], ) def test_finding_field_strict(self): """bibrecord - test finding field strict""" self.assertEqual((1, 0), bibrecord.record_find_field(self.rec, '001', self.field0, strict=True)) self.assertEqual((2, 0), bibrecord.record_find_field(self.rec, '037', self.field1, strict=True)) self.assertEqual((None, None), bibrecord.record_find_field(self.rec, '037', self.field2, strict=True)) def test_finding_field_loose(self): """bibrecord - test finding field loose""" self.assertEqual((1, 0), bibrecord.record_find_field(self.rec, '001', self.field0, strict=False)) self.assertEqual((2, 0), bibrecord.record_find_field(self.rec, '037', self.field1, strict=False)) self.assertEqual((2, 0), bibrecord.record_find_field(self.rec, '037', self.field2, strict=False)) class BibRecordSingletonTest(InvenioTestCase): """ bibrecord - testing singleton removal """ def setUp(self): """Initialize stuff""" self.xml = """<collection> <record> <controlfield tag="001">33</controlfield> <controlfield tag="002" /> <datafield tag="99" ind1=" " ind2=" "/> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a" /> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Some value</subfield> </datafield> <tagname /> </record> <record /> <collection>""" self.rec_expected = { '001': [([], ' ', ' ', '33', 1)], '100': [([('a', 'Some value')], ' ', ' ', '', 2)], } if parser_minidom_available: def test_singleton_removal_minidom(self): """bibrecord - enforcing singleton removal with minidom""" rec = bibrecord.create_records(self.xml, verbose=1, correct=1, parser='minidom', keep_singletons=False)[0][0] self.assertEqual(rec, self.rec_expected) if parser_4suite_available: def test_singleton_removal_4suite(self): """bibrecord - enforcing singleton removal with 4suite""" rec = bibrecord.create_records(self.xml, verbose=1, correct=1, parser='4suite', keep_singletons=False)[0][0] self.assertEqual(rec, self.rec_expected) if parser_pyrxp_available: def test_singleton_removal_pyrxp(self): """bibrecord - enforcing singleton removal with pyrxp""" rec = bibrecord.create_records(self.xml, verbose=1, correct=1, parser='pyrxp', keep_singletons=False)[0][0] self.assertEqual(rec, self.rec_expected) if parser_lxml_available: def test_singleton_removal_lxml(self): """bibrecord - enforcing singleton removal with lxml""" rec = bibrecord.create_records(self.xml, verbose=1, correct=1, parser='lxml', keep_singletons=False)[0][0] self.assertEqual(rec, self.rec_expected) class BibRecordNumCharRefTest(InvenioTestCase): """ bibrecord - testing numerical character reference expansion""" def setUp(self): """Initialize stuff""" self.xml = """<?xml version="1.0" encoding="UTF-8"?> <record> <controlfield tag="001">33</controlfield> <datafield tag="123" ind1=" " ind2=" "> <subfield code="a">Σ & Σ</subfield> <subfield code="a">use &amp; in XML</subfield> </datafield> </record>""" self.rec_expected = { '001': [([], ' ', ' ', '33', 1)], '123': [([('a', '\xce\xa3 & \xce\xa3'), ('a', 'use & in XML'),], ' ', ' ', '', 2)], } if parser_minidom_available: def test_numcharref_expansion_minidom(self): """bibrecord - numcharref expansion with minidom""" rec = bibrecord.create_records(self.xml, verbose=1, correct=1, parser='minidom')[0][0] self.assertEqual(rec, self.rec_expected) if parser_4suite_available: def test_numcharref_expansion_4suite(self): """bibrecord - numcharref expansion with 4suite""" rec = bibrecord.create_records(self.xml, verbose=1, correct=1, parser='4suite')[0][0] self.assertEqual(rec, self.rec_expected) if parser_pyrxp_available: def test_numcharref_expansion_pyrxp(self): """bibrecord - but *no* numcharref expansion with pyrxp (see notes) FIXME: pyRXP does not seem to like num char ref entities, so this test is mostly left here in a TDD style in order to remind us of this fact. If we want to fix this situation, then we should probably use pyRXPU that uses Unicode strings internally, hence it is num char ref friendly. Maybe we should use pyRXPU by default, if performance is acceptable, or maybe we should introduce a flag to govern this behaviour. """ rec = bibrecord.create_records(self.xml, verbose=1, correct=1, parser='pyrxp')[0][0] #self.assertEqual(rec, self.rec_expected) self.assertEqual(rec, None) if parser_lxml_available: def test_numcharref_expansion_lxml(self): """bibrecord - numcharref expansion with lxml""" rec = bibrecord.create_records(self.xml, verbose=1, correct=1, parser='lxml')[0][0] self.assertEqual(rec, self.rec_expected) class BibRecordExtractIdentifiersTest(InvenioTestCase): """ bibrecord - testing for getting identifiers from record """ def setUp(self): """Initialize stuff""" from invenio.config import CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG xml_example_record = """ <record> <controlfield tag="001">1</controlfield> <datafield tag="100" ind1="C" ind2="5"> <subfield code="a">val1</subfield> </datafield> <datafield tag="024" ind1="7" ind2=" "> <subfield code="2">doi</subfield> <subfield code="a">5555/TEST1</subfield> </datafield> <datafield tag="024" ind1="7" ind2=" "> <subfield code="2">DOI</subfield> <subfield code="a">5555/TEST2</subfield> </datafield> <datafield tag="024" ind1="7" ind2=" "> <subfield code="2">nondoi</subfield> <subfield code="a">5555/TEST3</subfield> </datafield> <datafield tag="024" ind1="8" ind2=" "> <subfield code="2">doi</subfield> <subfield code="a">5555/TEST4</subfield> </datafield> <datafield tag="%(oai_tag)s" ind1="%(oai_ind1)s" ind2="%(oai_ind2)s"> <subfield code="%(oai_subcode)s">oai:atlantis:1</subfield> </datafield> </record> """ % {'oai_tag': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[0:3], 'oai_ind1': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[3], 'oai_ind2': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[4], 'oai_subcode': CFG_BIBUPLOAD_EXTERNAL_OAIID_TAG[5], } self.rec = bibrecord.create_record(xml_example_record, 1, 1)[0] def test_extract_doi(self): """bibrecord - getting DOI identifier(s) from record""" self.assertEqual(bibrecord.record_extract_dois(self.rec), ['5555/TEST1', '5555/TEST2']) def test_extract_oai_id(self): """bibrecord - getting OAI identifier(s) from record""" self.assertEqual(bibrecord.record_extract_oai_id(self.rec), 'oai:atlantis:1') TEST_SUITE = make_test_suite( BibRecordSuccessTest, BibRecordParsersTest, BibRecordBadInputTreatmentTest, BibRecordGettingFieldValuesTest, BibRecordGettingFieldValuesViaWildcardsTest, BibRecordAddFieldTest, BibRecordDeleteFieldTest, BibRecordManageMultipleFieldsTest, BibRecordDeleteFieldFromTest, BibRecordAddSubfieldIntoTest, BibRecordModifyControlfieldTest, BibRecordModifySubfieldTest, BibRecordDeleteSubfieldFromTest, BibRecordMoveSubfieldTest, BibRecordCompareSubfieldTest, BibRecordAccentedUnicodeLettersTest, BibRecordSpecialTagParsingTest, BibRecordPrintingTest, BibRecordCreateFieldTest, BibRecordFindFieldTest, BibRecordDeleteSubfieldTest, BibRecordSingletonTest, BibRecordNumCharRefTest, BibRecordExtractIdentifiersTest, BibRecordDropDuplicateFieldsTest ) if __name__ == '__main__': run_test_suite(TEST_SUITE) diff --git a/invenio/modules/records/testsuite/test_record.py b/invenio/modules/records/testsuite/test_record.py index cd386ba18..c01efb3bd 100644 --- a/invenio/modules/records/testsuite/test_record.py +++ b/invenio/modules/records/testsuite/test_record.py @@ -1,623 +1,622 @@ # -*- coding: utf-8 -*- ## ## This file is part of Invenio. ## Copyright (C) 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import os import pkg_resources from mock import patch from flask.ext.registry import PkgResourcesDirDiscoveryRegistry, \ ImportPathRegistry, RegistryProxy from invenio.base.wrappers import lazy_import from invenio.ext.registry import ModuleAutoDiscoverySubRegistry from invenio.testsuite import make_test_suite, run_test_suite, \ InvenioTestCase, nottest Record = lazy_import('invenio.modules.records.api:Record') Document = lazy_import('invenio.modules.documents.api:Document') Field_parser = lazy_import('invenio.modules.jsonalchemy.parser:FieldParser') Model_parser = lazy_import('invenio.modules.jsonalchemy.parser:ModelParser') TEST_PACKAGE = 'invenio.modules.records.testsuite' test_registry = RegistryProxy('testsuite', ImportPathRegistry, initial=[TEST_PACKAGE]) field_definitions = lambda: PkgResourcesDirDiscoveryRegistry( 'fields', registry_namespace=test_registry) model_definitions = lambda: PkgResourcesDirDiscoveryRegistry( 'models', registry_namespace=test_registry) function_proxy = lambda: ModuleAutoDiscoverySubRegistry( 'functions', registry_namespace=test_registry) class TestRecord(InvenioTestCase): """Record - demo file parsing test.""" @classmethod def setUpClass(cls): """Invalidate any previous field definition""" Field_parser._field_definitions = {} Field_parser._legacy_field_matchings = {} Model_parser._model_definitions = {} def setUp(self): """Initialize stuff""" self.app.extensions['registry']['testsuite.fields'] = field_definitions() self.app.extensions['registry']['testsuite.models'] = model_definitions() self.app.extensions['registry']['testsuite.functions'] = function_proxy() def tearDown(self): del self.app.extensions['registry']['testsuite.fields'] del self.app.extensions['registry']['testsuite.models'] del self.app.extensions['registry']['testsuite.functions'] @nottest def test_records_created(self): """Record - demo file how many records are created.""" xmltext = pkg_resources.resource_string( 'invenio.testsuite', os.path.join('data', 'demo_record_marc_data.xml')) recs = [record for record in Record.create_many(xmltext, master_format='marc')] self.assertEqual(141, len(recs)) def test_accented_unicode_letterst_test(self): """Record - accented Unicode letters.""" xml = '''<record> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Döè1, John</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">Пушкин</subfield> </datafield> </record> ''' rec = Record.create(xml, master_format='marc', namespace='testsuite') self.assertEquals(rec['authors[0].full_name'], 'Döè1, John') self.assertEquals(rec['title.title'], 'Пушкин') def test_create_empty_record(self): """Record - Create empty record.""" rec = Record(master_format='marc', namespace='testsuite') self.assertTrue('__meta_metadata__' in rec) self.assertEquals(list(rec.keys()), ['__meta_metadata__']) rec['title'] = {'title': 'My title'} self.assertTrue('title' in rec) self.assertTrue('title' in rec['__meta_metadata__']) rec.set('title', {'title': 'Second title?'}, extend=True) self.assertEquals(len(rec['title']), 2) def test_validate(self): """Record - Validate record.""" rec = Record(master_format='marc', namespace='testsuite') self.assertTrue('__meta_metadata__' in rec) self.assertTrue('recid' in rec.validate()) rec['recid'] = '1' self.assertTrue('recid' in rec.validate()) self.assertEquals(rec.validate()['recid'], 'must be of integer type') class TestLegacyExport(InvenioTestCase): """Record - Legacy methods test.""" def setUp(self): """Initialize stuff""" self.app.extensions['registry']['testsuite.fields'] = field_definitions() self.app.extensions['registry']['testsuite.models'] = model_definitions() self.app.extensions['registry']['testsuite.functions'] = function_proxy() def tearDown(self): del self.app.extensions['registry']['testsuite.fields'] del self.app.extensions['registry']['testsuite.models'] del self.app.extensions['registry']['testsuite.functions'] def test_legacy_export_marcxml(self): """Record - legacy export marxml.""" # FIXME: use a better way to compare from invenio.legacy.bibrecord import create_record, records_identical blob = ''' <record> <controlfield tag="001">8</controlfield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Efstathiou, G P</subfield> <subfield code="u">Cambridge University</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">Title</subfield> <subfield code="b">SubTitle</subfield> </datafield> <datafield tag="700" ind1=" " ind2=" "> <subfield code="a">Lasenby, A N</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">Articles</subfield> </datafield> </record> ''' rec = Record.create(blob, master_format='marc', namespace='testsuite') recstruct, _, _ = create_record(blob) json_recstruct, _, _ = create_record(rec.legacy_export_as_marc()) self.assertTrue(records_identical(json_recstruct, recstruct, ignore_subfield_order=True)) def test_legacy_create_recstruct(self): """Record - create recstruct.""" from invenio.legacy.bibrecord import create_record, records_identical blob = ''' <record> <controlfield tag="001">8</controlfield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Efstathiou, G P</subfield> <subfield code="u">Cambridge University</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">Title</subfield> <subfield code="b">SubTitle</subfield> </datafield> <datafield tag="700" ind1=" " ind2=" "> <subfield code="a">Lasenby, A N</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">Articles</subfield> </datafield> </record> ''' rec = Record.create(blob, master_format='marc', namespace='testsuite') json_recstruct = rec.legacy_create_recstruct() recstruct, _, _ = create_record(blob) self.assertTrue(records_identical(json_recstruct, recstruct, ignore_subfield_order=True)) class TestMarcRecordCreation(InvenioTestCase): """Records from marc.""" @classmethod def setUpClass(cls): """Invalidate any previous field definition""" Field_parser._field_definitions = {} Field_parser._legacy_field_matchings = {} Model_parser._model_definitions = {} def setUp(self): """Initialize stuff""" self.app.extensions['registry']['testsuite.fields'] = field_definitions() self.app.extensions['registry']['testsuite.models'] = model_definitions() def tearDown(self): del self.app.extensions['registry']['testsuite.fields'] del self.app.extensions['registry']['testsuite.models'] def test_rec_json_creation_from_marcxml(self): """Record - recjson from marcxml""" xml = """ <record> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">astro-ph/9812226</subfield> </datafield> <datafield tag="041" ind1=" " ind2=" "> <subfield code="a">eng</subfield> </datafield> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Efstathiou, G P</subfield> <subfield code="u">Cambridge University</subfield> </datafield> <datafield tag="245" ind1=" " ind2=" "> <subfield code="a">Constraints on $\Omega_{\Lambda}$ and $\Omega_{m}$from Distant Type 1a Supernovae and Cosmic Microwave Background Anisotropies</subfield> </datafield> <datafield tag="260" ind1=" " ind2=" "> <subfield code="c">14 Dec 1998</subfield> </datafield> <datafield tag="300" ind1=" " ind2=" "> <subfield code="a">6 p</subfield> </datafield> <datafield tag="520" ind1=" " ind2=" "> <subfield code="a">We perform a combined likelihood analysis of the latest cosmic microwave background anisotropy data and distant Type 1a Supernova data of Perlmutter etal (1998a). Our analysis is restricted tocosmological models where structure forms from adiabatic initial fluctuations characterised by a power-law spectrum with negligible tensor component. Marginalizing over other parameters, our bestfit solution gives Omega_m = 0.25 (+0.18, -0.12) and Omega_Lambda = 0.63 (+0.17, -0.23) (95 % confidence errors) for the cosmic densities contributed by matter and a cosmological constantrespectively. The results therefore strongly favour a nearly spatially flat Universe with a non-zero cosmological constant.</subfield> </datafield> <datafield tag="595" ind1=" " ind2=" "> <subfield code="a">LANL EDS</subfield> </datafield> <datafield tag="650" ind1="1" ind2="7"> <subfield code="2">SzGeCERN</subfield> <subfield code="a">Astrophysics and Astronomy</subfield> </datafield> <datafield tag="700" ind1=" " ind2=" "> <subfield code="a">Lasenby, A N</subfield> </datafield> <datafield tag="700" ind1=" " ind2=" "> <subfield code="a">Hobson, M P</subfield> </datafield> <datafield tag="700" ind1=" " ind2=" "> <subfield code="a">Ellis, R S</subfield> </datafield> <datafield tag="700" ind1=" " ind2=" "> <subfield code="a">Bridle, S L</subfield> </datafield> <datafield tag="856" ind1="0" ind2=" "> <subfield code="f">George Efstathiou <gpe@ast.cam.ac.uk></subfield> </datafield> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="a">http://invenio-software.org/download/invenio-demo-site-files/9812226.pdf</subfield> </datafield> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="a">http://invenio-software.org/download/invenio-demo-site-files/9812226.fig1.ps.gz</subfield> <subfield code="t">Additional</subfield> </datafield> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="a">http://invenio-software.org/download/invenio-demo-site-files/9812226.fig3.ps.gz</subfield> <subfield code="t">Additional</subfield> </datafield> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="a">http://invenio-software.org/download/invenio-demo-site-files/9812226.fig5.ps.gz</subfield> <subfield code="t">Additional</subfield> </datafield> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="a">http://invenio-software.org/download/invenio-demo-site-files/9812226.fig6.ps.gz</subfield> <subfield code="t">Additional</subfield> </datafield> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="a">http://invenio-software.org/download/invenio-demo-site-files/9812226.fig7.ps.gz</subfield> <subfield code="t">Additional</subfield> </datafield> <datafield tag="909" ind1="C" ind2="0"> <subfield code="y">1998</subfield> </datafield> <datafield tag="909" ind1="C" ind2="0"> <subfield code="b">11</subfield> </datafield> <datafield tag="909" ind1="C" ind2="1"> <subfield code="c">1998-12-14</subfield> <subfield code="l">50</subfield> <subfield code="m">2001-04-07</subfield> <subfield code="o">BATCH</subfield> </datafield> <datafield tag="909" ind1="C" ind2="4"> <subfield code="p">Mon. Not. R. Astron. Soc.</subfield> </datafield> <datafield tag="909" ind1="C" ind2="O"> <subfield code="i">SLAC</subfield> <subfield code="s">4162242</subfield> </datafield> <datafield tag="909" ind1="C" ind2="5"> <subfield code="b">CER</subfield> </datafield> <datafield tag="909" ind1="C" ind2="S"> <subfield code="s">n</subfield> <subfield code="w">200231</subfield> </datafield> <datafield tag="980" ind1=" " ind2=" "> <subfield code="a">PREPRINT</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Bond, J.R. 1996, Theory and Observations of the Cosmic Background Radiation, in "Cosmology and Large Scale Structure", Les Houches Session LX, August 1993, eds. R. Schaeffer, J. Silk, M. Spiro and J. Zinn-Justin, Elsevier SciencePress, Amsterdam, p469</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Bond J.R., Efstathiou G., Tegmark M., 1997</subfield> <subfield code="p">L33</subfield> <subfield code="t">Mon. Not. R. Astron. Soc.</subfield> <subfield code="v">291</subfield> <subfield code="y">1997</subfield> <subfield code="s">Mon. Not. R. Astron. Soc. 291 (1997) L33</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Bond, J.R., Jaffe, A. 1997, in Proc. XXXI Rencontre de Moriond, ed. F. Bouchet, Edition Fronti eres, in press</subfield> <subfield code="r">astro-ph/9610091</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Bond J.R., Jaffe A.H. and Knox L.E., 1998</subfield> <subfield code="r">astro-ph/9808264</subfield> <subfield code="s">Astrophys.J. 533 (2000) 19</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Burles S., Tytler D., 1998a, to appear in the Proceedings of the Second Oak Ridge Symposium on Atomic & Nuclear Astrophysics, ed. A. Mezzacappa, Institute of Physics, Bristol</subfield> <subfield code="r">astro-ph/9803071</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Burles S., Tytler D., 1998b, Astrophys. J.in press</subfield> <subfield code="r">astro-ph/9712109</subfield> <subfield code="s">Astrophys.J. 507 (1998) 732</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Caldwell, R.R., Dave, R., Steinhardt P.J., 1998</subfield> <subfield code="p">1582</subfield> <subfield code="t">Phys. Rev. Lett.</subfield> <subfield code="v">80</subfield> <subfield code="y">1998</subfield> <subfield code="s">Phys. Rev. Lett. 80 (1998) 1582</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Carroll S.M., Press W.H., Turner E.L., 1992, Ann. Rev. Astr. Astrophys., 30, 499. Chaboyer B., 1998</subfield> <subfield code="r">astro-ph/9808200</subfield> <subfield code="s">Phys.Rept. 307 (1998) 23</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Devlin M.J., De Oliveira-Costa A., Herbig T., Miller A.D., Netterfield C.B., Page L., Tegmark M., 1998, submitted to Astrophys. J</subfield> <subfield code="r">astro-ph/9808043</subfield> <subfield code="s">Astrophys. J. 509 (1998) L69-72</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Efstathiou G. 1996, Observations of Large-Scale Structure in the Universe, in "Cosmology and Large Scale Structure", Les Houches Session LX, August 1993, eds. R. Schaeffer, J. Silk, M. Spiro and J. Zinn-Justin, Elsevier SciencePress, Amsterdam, p135. Efstathiou G., Bond J.R., Mon. Not. R. Astron. Soc.in press</subfield> <subfield code="r">astro-ph/9807130</subfield> <subfield code="s">Astrophys. J. 518 (1999) 2-23</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Evrard G., 1998, submitted to Mon. Not. R. Astron. Soc</subfield> <subfield code="r">astro-ph/9701148</subfield> <subfield code="s">Mon.Not.Roy.Astron.Soc. 292 (1997) 289</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Freedman J.B., Mould J.R., Kennicutt R.C., Madore B.F., 1998</subfield> <subfield code="r">astro-ph/9801090</subfield> <subfield code="s">Astrophys. J. 480 (1997) 705</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Garnavich P.M. et al. 1998</subfield> <subfield code="r">astro-ph/9806396</subfield> <subfield code="s">Astrophys.J. 509 (1998) 74-79</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Goobar A., Perlmutter S., 1995</subfield> <subfield code="p">14</subfield> <subfield code="t">Astrophys. J.</subfield> <subfield code="v">450</subfield> <subfield code="y">1995</subfield> <subfield code="s">Astrophys. J. 450 (1995) 14</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Hamuy M., Phillips M.M., Maza J., Suntzeff N.B., Schommer R.A., Aviles R. 1996</subfield> <subfield code="p">2391</subfield> <subfield code="t">Astrophys. J.</subfield> <subfield code="v">112</subfield> <subfield code="y">1996</subfield> <subfield code="s">Astrophys. J. 112 (1996) 2391</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Hancock S., Gutierrez C.M., Davies R.D., Lasenby A.N., Rocha G., Rebolo R., Watson R.A., Tegmark M., 1997</subfield> <subfield code="p">505</subfield> <subfield code="t">Mon. Not. R. Astron. Soc.</subfield> <subfield code="v">298</subfield> <subfield code="y">1997</subfield> <subfield code="s">Mon. Not. R. Astron. Soc. 298 (1997) 505</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Hancock S., Rocha G., Lasenby A.N., Gutierrez C.M., 1998</subfield> <subfield code="p">L1</subfield> <subfield code="t">Mon. Not. R. Astron. Soc.</subfield> <subfield code="v">294</subfield> <subfield code="y">1998</subfield> <subfield code="s">Mon. Not. R. Astron. Soc. 294 (1998) L1</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Herbig T., De Oliveira-Costa A., Devlin M.J., Miller A.D., Page L., Tegmark M., 1998, submitted to Astrophys. J</subfield> <subfield code="r">astro-ph/9808044</subfield> <subfield code="s">Astrophys.J. 509 (1998) L73-76</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Lineweaver C.H., 1998. Astrophys. J.505, L69. Lineweaver, C.H., Barbosa D., 1998a</subfield> <subfield code="p">624</subfield> <subfield code="t">Astrophys. J.</subfield> <subfield code="v">446</subfield> <subfield code="y">1998</subfield> <subfield code="s">Astrophys. J. 446 (1998) 624</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Lineweaver, C.H., Barbosa D., 1998b</subfield> <subfield code="p">799</subfield> <subfield code="t">Astron. Astrophys.</subfield> <subfield code="v">329</subfield> <subfield code="y">1998</subfield> <subfield code="s">Astron. Astrophys. 329 (1998) 799</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">De Oliveira-Costa A., Devlin M.J., Herbig T., Miller A.D., Netterfield C.B. Page L., Tegmark M., 1998, submitted to Astrophys. J</subfield> <subfield code="r">astro-ph/9808045</subfield> <subfield code="s">Astrophys. J. 509 (1998) L77-80</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Ostriker J.P., Steinhardt P.J., 1995</subfield> <subfield code="p">600</subfield> <subfield code="t">Nature</subfield> <subfield code="v">377</subfield> <subfield code="y">1995</subfield> <subfield code="s">Nature 377 (1995) 600</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Peebles P.J.E., 1993, Principles of Physical Cosmology, Princeton University Press, Princeton, New Jersey. Perlmutter S, et al., 1995, In Presentations at the NATO ASI in Aiguablava, Spain, LBL-38400; also published in Thermonuclear Supernova, P. Ruiz-Lapuente, R. Cana and J. Isern (eds), Dordrecht, Kluwer, 1997, p749. Perlmutter S, et al., 1997</subfield> <subfield code="p">565</subfield> <subfield code="t">Astrophys. J.</subfield> <subfield code="v">483</subfield> <subfield code="y">1997</subfield> <subfield code="s">Astrophys. J. 483 (1997) 565</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Perlmutter S. et al., 1998a, Astrophys. J.in press. (P98)</subfield> <subfield code="r">astro-ph/9812133</subfield> <subfield code="s">Astrophys. J. 517 (1999) 565-586</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Perlmutter S. et al., 1998b, In Presentation at the January 1988 Meeting of the American Astronomical Society, Washington D.C., LBL-42230, available at www-supernova.lbl.gov; B.A.A.S., volume : 29 (1997) 1351Perlmutter S, et al., 1998c</subfield> <subfield code="p">51</subfield> <subfield code="t">Nature</subfield> <subfield code="v">391</subfield> <subfield code="y">1998</subfield> <subfield code="s">Nature 391 (1998) 51</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Ratra B., Peebles P.J.E., 1988</subfield> <subfield code="p">3406</subfield> <subfield code="t">Phys. Rev., D</subfield> <subfield code="v">37</subfield> <subfield code="y">1988</subfield> <subfield code="s">Phys. Rev. D 37 (1988) 3406</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Riess A. et al. 1998, Astrophys. J.in press</subfield> <subfield code="r">astro-ph/9805201</subfield> <subfield code="s">Astron. J. 116 (1998) 1009-1038</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Seljak U., Zaldarriaga M. 1996</subfield> <subfield code="p">437</subfield> <subfield code="t">Astrophys. J.</subfield> <subfield code="v">469</subfield> <subfield code="y">1996</subfield> <subfield code="s">Astrophys. J. 469 (1996) 437</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Seljak U. & Zaldarriaga M., 1998</subfield> <subfield code="r">astro-ph/9811123</subfield> <subfield code="s">Phys. Rev. D60 (1999) 043504</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Tegmark M., 1997</subfield> <subfield code="p">3806</subfield> <subfield code="t">Phys. Rev. Lett.</subfield> <subfield code="v">79</subfield> <subfield code="y">1997</subfield> <subfield code="s">Phys. Rev. Lett. 79 (1997) 3806</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Tegmark M. 1998, submitted to Astrophys. J</subfield> <subfield code="r">astro-ph/9809201</subfield> <subfield code="s">Astrophys. J. 514 (1999) L69-L72</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Tegmark, M., Eisenstein D.J., Hu W., Kron R.G., 1998</subfield> <subfield code="r">astro-ph/9805117</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Wambsganss J., Cen R., Ostriker J.P., 1998</subfield> <subfield code="p">29</subfield> <subfield code="t">Astrophys. J.</subfield> <subfield code="v">494</subfield> <subfield code="y">1998</subfield> <subfield code="s">Astrophys. J. 494 (1998) 29</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Webster M., Bridle S.L., Hobson M.P., Lasenby A.N., Lahav O., Rocha, G., 1998, Astrophys. J.in press</subfield> <subfield code="r">astro-ph/9802109</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">White M., 1998, Astrophys. J.in press</subfield> <subfield code="r">astro-ph/9802295</subfield> <subfield code="s">Astrophys. J. 506 (1998) 495</subfield> </datafield> <datafield tag="999" ind1="C" ind2="5"> <subfield code="m">Zaldarriaga, M., Spergel D.N., Seljak U., 1997</subfield> <subfield code="p">1</subfield> <subfield code="t">Astrophys. J.</subfield> <subfield code="v">488</subfield> <subfield code="y">1997</subfield> <subfield code="s">Astrophys. J. 488 (1997) 1</subfield> </datafield> </record> """ r = Record.create(xml, master_format='marc', namespace='testsuite', schema='xml') self.assertEquals(r.additional_info.master_format, 'marc') self.assertTrue('authors' in r) self.assertEquals(r['authors[0].full_name'], "Efstathiou, G P") self.assertEquals(len(r['authors']), 5) self.assertTrue('title.title' in r) self.assertEquals(r['title.title'], "Constraints on $\Omega_{\Lambda}$ and $\Omega_{m}$from Distant Type 1a Supernovae and Cosmic Microwave Background Anisotropies") self.assertTrue('abstract.summary' in r) self.assertEquals(r['abstract.summary'], "We perform a combined likelihood analysis of the latest cosmic microwave background anisotropy data and distant Type 1a Supernova data of Perlmutter etal (1998a). Our analysis is restricted tocosmological models where structure forms from adiabatic initial fluctuations characterised by a power-law spectrum with negligible tensor component. Marginalizing over other parameters, our bestfit solution gives Omega_m = 0.25 (+0.18, -0.12) and Omega_Lambda = 0.63 (+0.17, -0.23) (95 % confidence errors) for the cosmic densities contributed by matter and a cosmological constantrespectively. The results therefore strongly favour a nearly spatially flat Universe with a non-zero cosmological constant.") self.assertTrue('reference' in r) self.assertEquals(len(r['reference']), 36) def test_error_catching(self): """ Record - catch any record conversion issues """ from invenio.modules.jsonalchemy.errors import ReaderException + from invenio.legacy.bibrecord import _select_parser blob = """<?xml version="1.0" encoding="UTF-8"?> <collection> <record> <datafield tag="FFT" ind1=" " ind2=" "> <subfield code="a">/path/to</subfield> <subfield code="t">Test</subfield> </record> </collection> """ - self.assertRaises( - ReaderException, - Record.create, - blob, - master_format='marc', - namespace='testsuite', - schema='xml' - ) + # lxml is super resilient to a tag soup, it won't fail on such a simple + # mistake. + if _select_parser() != 'lxml': + with self.assertRaises(ReaderException): + Record.create(blob, master_format='marc', + namespace='testsuite', schema='xml') class TestRecordDocuments(InvenioTestCase): """Test record doccuments behaviour.""" def setUp(self): self.app.config['DOCUMENTS_ENGINE'] = \ "invenio.modules.jsonalchemy.jsonext.engines.memory:MemoryStorage" @patch('invenio.legacy.search_engine.check_user_can_view_record') def test_restricted_record_non_restricted_document( self, check_user_can_view_record_patch): """Record - Restrcited access to record documents.""" d = Document.create({'title': 'Document 1', 'description': 'Testing 1', 'restriction': {'email': 'user@invenio.org'}, 'recids': [1,2,3], }, model='record_document_base') user_info = {'email': 'user@invenio.org', 'uid': -1} self.app.config['RECORD_DOCUMENT_VIEWRESTR_POLICY'] = 'ANY' check_user_can_view_record_patch.return_value = (0, '') self.assertEquals(d.is_authorized(user_info)[0], 0) check_user_can_view_record_patch.return_value = (1, '') self.assertEquals(d.is_authorized(user_info)[0], 1) check_user_can_view_record_patch.side_effect = \ lambda user_info, recid: (recid%2, '') # At least one record must be authorized self.assertEquals(d.is_authorized(user_info)[0], 0) # All records must be authorized self.app.config['RECORD_DOCUMENT_VIEWRESTR_POLICY'] = 'ALL' self.assertEquals(d.is_authorized(user_info)[0], 1) check_user_can_view_record_patch.side_effect = None check_user_can_view_record_patch.return_value = (0, '') self.assertEquals(d.is_authorized(user_info)[0], 0) TEST_SUITE = make_test_suite(TestRecord, TestMarcRecordCreation, TestRecordDocuments) if __name__ == '__main__': run_test_suite(TEST_SUITE) diff --git a/requirements.txt b/requirements.txt index 65fd0c1e6..d3e9d295c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,8 @@ -e git+git://github.com/lnielsen-cern/dictdiffer.git#egg=dictdiffer -e git+https://github.com/david-e/flask-admin.git@bootstrap3#egg=Flask-Admin -e git+https://github.com/mitsuhiko/flask-sqlalchemy#egg=Flask-SQLAlchemy-dev # requires numpy before it can be installed #-e svn://svn.code.sf.net/p/gnuplot-py/code/trunk#egg=gnuplot-py -https://www.reportlab.com/ftp/pyRXP-1.16-daily-unix.tar.gz#egg=pyRXP -e git+git://github.com/romanchyla/workflow.git@e41299579501704b1486c72cc2509a9f82e63ea6#egg=workflow -e .[development] diff --git a/setup.py b/setup.py index fca9f59a1..1fd68df70 100644 --- a/setup.py +++ b/setup.py @@ -1,275 +1,285 @@ # -*- coding: utf-8 -*- ## This file is part of Invenio. ## Copyright (C) 2013, 2014 CERN. ## ## Invenio is free software; you can redistribute it and/or ## modify it under the terms of the GNU General Public License as ## published by the Free Software Foundation; either version 2 of the ## License, or (at your option) any later version. ## ## Invenio is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Invenio; if not, write to the Free Software Foundation, Inc., ## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """ Invenio is Fun. Links ----- * `website <http://invenio-software.org/>`_ * `documentation <http://invenio.readthedocs.org/en/latest/>`_ * `development version <https://github.com/inveniosoftware/invenio>`_ """ import os import sys from setuptools import setup, find_packages -from setuptools.command.install_lib import install_lib as _install_lib -from distutils.command.build import build as _build +from setuptools.command.install_lib import install_lib +from distutils.command.build import build -class build(_build): +class _build(build): """Compile catalog before building the package.""" - sub_commands = [('compile_catalog', None)] + _build.sub_commands + sub_commands = [('compile_catalog', None)] + build.sub_commands -class install_lib(_install_lib): +class _install_lib(install_lib): - """Compile catalog before runing installation command.""" + """Custom install_lib command.""" def run(self): + """Compile catalog before runing installation command.""" self.run_command('compile_catalog') - _install_lib.run(self) + install_lib.run(self) install_requires = [ "alembic==0.6.2", "Babel==1.3", "BeautifulSoup==3.2.1", "BeautifulSoup4==4.3.2", "celery==3.1.12", "Cerberus==0.7", "dictdiffer==0.0.3", "feedparser==5.1.3", "fixture==1.5", "Flask==0.10.1", "Flask-Admin==1.0.7", "Flask-Assets==0.10", "Flask-Babel==0.9", "Flask-Breadcrumbs==0.1", "Flask-Cache==0.12", "Flask-Collect>=0.2.3", "Flask-Email==1.4.4", "Flask-Gravatar==0.4.0", "Flask-Login==0.2.7", "Flask-Menu==0.1", "Flask-OAuthlib==0.4.3", "Flask-Principal==0.4.0", "Flask-Registry>0.1", "Flask-RESTful==0.2.12", "Flask-Script>=2.0.5", "Flask-SQLAlchemy>1.9", "Flask-WTF==0.9.5", "fs==0.4.0", "intbitset==2.0", "jellyfish>=0.3.1", "Jinja2==2.7.3", "libmagic==1.0", "lxml>=3.3", "mechanize==0.2.5", "msgpack-python==0.3.0", "MySQL-python==1.2.5", "numpy==1.7.0", "pyparsing==2.0.1", "python-twitter==0.8.7", "pyPDF==1.13", "pyPDF2", "PyLD>=0.5.2", - "pyRXP==1.16", "pyStemmer==1.3.0", # python-dateutil>=2.0 is only for Python3 "python-dateutil>=1.5,<2.0", "python-magic==0.4.6", "pytz", "rauth", "raven==4.2.1", "rdflib==2.4.2", "redis==2.8.0", # Is it explicitly required? "reportlab==2.5", "requests==1.2.3", "six>=1.7.2", "Sphinx", "SQLAlchemy==0.8.3", "SQLAlchemy-Utils>=0.23.5,<0.24", "unidecode", "workflow==1.1.0", # Flask-WTF 0.9.5 doesn't support WTForms 2.0 as of yet. "WTForms>=1.0.5,<2.0", "wtforms-alchemy==0.12.6" ] extras_require = { "docs": [ "sphinx_rtd_theme" ], "development": [ "Flask-DebugToolbar==0.9.0", ], "elasticsearch": [ "pyelasticsearch>=0.6.1" ], "img": [ "qrcode", "Pillow" ], "mongo": [ "pymongo" ], "misc": [ # was requirements-extras "apiclient", # extra=cloud? "dropbox", # extra=cloud? "gnuplot-py==1.8", "flake8", # extra=kwalitee? "pep8", # extra=kwalitee? "pychecker==0.8.19", # extra=kwalitee? "pylint", # extra=kwalitee? "nosexcover", # test? "oauth2client", # extra=cloud? "python-onedrive", # extra=cloud? "python-openid", # extra=sso? "urllib3", # extra=cloud? ], "sso": [ "Flask-SSO>=0.1" + ], + # Alternative XML parsers + # + # For pyRXP, the version PyPI many not be the right one. + # + # $ pip install + # > https://www.reportlab.com/ftp/pyRXP-1.16-daily-unix.tar.gz#egg=pyRXP + "xml.parsers": [ + "pyRXP==1.16-daily-unix", + "4suite" ] } extras_require["docs"] += extras_require["elasticsearch"] extras_require["docs"] += extras_require["img"] extras_require["docs"] += extras_require["mongo"] extras_require["docs"] += extras_require["sso"] tests_require = [ "httpretty==0.8.0", "Flask-Testing==0.4.1", "mock", "nose", "selenium", "unittest2==0.5.1", ] # Compatibility with Python 2.6 if sys.version_info < (2, 7): install_requires += [ "argparse", "importlib" ] # Get the version string. Cannot be done with import! g = {} with open(os.path.join("invenio", "version.py"), "rt") as fp: exec(fp.read(), g) version = g["__version__"] packages = find_packages(exclude=['docs']) packages.append('invenio_docs') setup( name='Invenio', version=version, url='https://github.com/inveniosoftware/invenio', license='GPLv2', author='CERN', author_email='info@invenio-software.org', description='Digital library software', long_description=__doc__, packages=packages, package_dir={'invenio_docs': 'docs'}, include_package_data=True, zip_safe=False, platforms='any', entry_points={ 'console_scripts': [ 'inveniomanage = invenio.base.manage:main', 'plotextractor = invenio.utils.scripts.plotextractor:main', # Legacy 'alertengine = invenio.legacy.webalert.scripts.alertengine:main', 'batchuploader = invenio.legacy.bibupload.scripts.batchuploader', 'bibcircd = invenio.legacy.bibcirculation.scripts.bibcircd:main', 'bibauthorid = invenio.legacy.bibauthorid.scripts.bibauthorid:main', 'bibclassify = invenio.modules.classifier.scripts.classifier:main', 'bibconvert = invenio.legacy.bibconvert.scripts.bibconvert:main', 'bibdocfile = invenio.legacy.bibdocfile.scripts.bibdocfile:main', 'bibedit = invenio.legacy.bibedit.scripts.bibedit:main', 'bibencode = invenio.modules.encoder.scripts.encoder:main', 'bibindex = invenio.legacy.bibindex.scripts.bibindex:main', 'bibmatch = invenio.legacy.bibmatch.scripts.bibmatch:main', 'bibrank = invenio.legacy.bibrank.scripts.bibrank:main', 'bibrankgkb = invenio.legacy.bibrank.scripts.bibrankgkb:main', 'bibreformat = invenio.legacy.bibformat.scripts.bibreformat:main', 'bibsort = invenio.legacy.bibsort.scripts.bibsort:main', 'bibsched = invenio.legacy.bibsched.scripts.bibsched:main', 'bibstat = invenio.legacy.bibindex.scripts.bibstat:main', 'bibtaskex = invenio.legacy.bibsched.scripts.bibtaskex:main', 'bibtasklet = invenio.legacy.bibsched.scripts.bibtasklet:main', 'bibupload = invenio.legacy.bibupload.scripts.bibupload:main', 'dbexec = invenio.legacy.miscutil.scripts.dbexec:main', 'dbdump = invenio.legacy.miscutil.scripts.dbdump:main', 'docextract = invenio.legacy.docextract.scripts.docextract:main', 'elmsubmit = invenio.legacy.elmsubmit.scripts.elmsubmit:main', 'gotoadmin = invenio.modules.redirector.scripts.redirector:main', 'inveniocfg = invenio.legacy.inveniocfg:main', 'inveniogc = invenio.legacy.websession.scripts.inveniogc:main', 'inveniounoconv = invenio.legacy.websubmit.scripts.inveniounoconv:main', 'oaiharvest = invenio.legacy.oaiharvest.scripts.oaiharvest:main', 'oairepositoryupdater = invenio.legacy.oairepository.scripts.oairepositoryupdater:main', 'arxiv-pdf-checker = invenio.legacy.pdfchecker:main', 'refextract = invenio.legacy.refextract.scripts.refextract:main', 'textmarc2xmlmarc = invenio.legacy.bibrecord.scripts.textmarc2xmlmarc:main', 'webaccessadmin = invenio.modules.access.scripts.webaccessadmin:main', 'webauthorprofile = invenio.legacy.webauthorprofile.scripts.webauthorprofile:main', 'webcoll = invenio.legacy.websearch.scripts.webcoll:main', 'webmessageadmin = invenio.legacy.webmessage.scripts.webmessageadmin:main', 'webstatadmin = invenio.legacy.webstat.scripts.webstatadmin:main', 'websubmitadmin = invenio.legacy.websubmit.scripts.websubmitadmin:main', 'xmlmarc2textmarc = invenio.legacy.bibrecord.scripts.xmlmarc2textmarc:main', 'xmlmarclint = invenio.legacy.bibrecord.scripts.xmlmarclint:main', ], "distutils.commands": [ "inveniomanage = invenio.base.setuptools:InvenioManageCommand", ] }, install_requires=install_requires, extras_require=extras_require, classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Web Environment', 'Intended Audience :: Developers', 'License :: OSI Approved :: GPLv2 License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', ], test_suite='invenio.testsuite.suite', tests_require=tests_require, cmdclass={ - 'build': build, - 'install_lib': install_lib, + 'build': _build, + 'install_lib': _install_lib, }, )