Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F87736031
marc_reader.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Oct 14, 15:40
Size
7 KB
Mime Type
text/x-python
Expires
Wed, Oct 16, 15:40 (2 d)
Engine
blob
Format
Raw Data
Handle
21647899
Attached To
R3600 invenio-infoscience
marc_reader.py
View Options
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2013, 2014 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Marc reader.
This reader (or any) shouldn't be used directly, at least in the normal use
cases. Instead :class:`~invenio.modules.jsonalchemy.readers.Reader` should be
used with the explicit format.
"""
import
copy
import
re
from
six
import
iteritems
from
werkzeug.utils
import
import_string
from
invenio.base.globals
import
cfg
from
invenio.base.utils
import
try_to_eval
from
invenio.modules.jsonalchemy.reader
import
Reader
from
invenio.modules.jsonalchemy.errors
import
ReaderException
from
invenio.modules.jsonalchemy.registry
import
functions
class
MarcReader
(
Reader
):
"""Marc reader."""
__master_format__
=
'marc'
split_marc
=
re
.
compile
(
'<record.*?>.*?</record>'
,
re
.
DOTALL
)
@staticmethod
def
split_blob
(
blob
,
schema
=
None
,
**
kwargs
):
"""Split the blob using <record.*?>.*?</record> as pattern.
Note 1: Taken from invenio.legacy.bibrecord:create_records
Note 2: Use the DOTALL flag to include newlines.
"""
if
schema
in
(
None
,
'xml'
):
for
match
in
MarcReader
.
split_marc
.
finditer
(
blob
):
yield
match
.
group
()
else
:
raise
StopIteration
()
def
guess_model_from_input
(
self
):
"""Guess from the input Marc the model to be used in this record.
This is the simplest implementation possible, it just take all `980__a`
tags and sets it as list of models.
The guess function could be easily change by setting
`CFG_MARC_MODEL_GUESSER` with an importable string
"""
guess_function
=
cfg
.
get
(
'CFG_MARC_MODEL_GUESSER'
,
None
)
if
guess_function
:
return
import_string
(
guess_function
)(
self
)
try
:
return
[
coll
[
'a'
]
.
lower
()
for
coll
in
self
.
rec_tree
.
get
(
'980__'
)
if
'a'
in
coll
]
except
TypeError
:
try
:
return
self
.
rec_tree
.
get
(
'980__'
,
{})[
'a'
]
except
:
return
'__default__'
def
_get_elements_from_blob
(
self
,
regex_key
):
"""Retrieve elements from the blob or the intermediate structure.
:return: A tuple containing the first match for the list of regex_keys
"""
if
regex_key
in
(
'entire_record'
,
'*'
):
return
self
.
rec_tree
for
k
in
regex_key
:
regex
=
re
.
compile
(
k
)
keys
=
filter
(
regex
.
match
,
self
.
rec_tree
.
keys
())
for
key
in
keys
:
if
key
in
self
.
rec_tree
:
return
(
key
,
self
.
rec_tree
.
get
(
key
,
[]))
return
(
''
,
[])
def
_prepare_blob
(
self
,
*
args
,
**
kwargs
):
"""Transform the incoming blob into recstruct.
FIXME: stop using recstruct!
"""
from
invenio.legacy.bibrecord
import
create_record
class
SaveDict
(
dict
):
__getitem__
=
dict
.
get
def
dict_extend_helper
(
d
,
key
,
value
):
"""Helper function.
If the key is present inside the dictionary it creates a list (if
not present) and extends it with the new value.
Almost as in `list.extend`
"""
if
key
in
d
:
current_value
=
d
.
get
(
key
)
if
not
isinstance
(
current_value
,
list
):
current_value
=
[
current_value
]
current_value
.
append
(
value
)
value
=
current_value
d
[
key
]
=
value
self
.
rec_tree
=
SaveDict
()
record
,
status_code
,
errors
=
create_record
(
self
.
_blob
)
if
status_code
==
0
:
if
isinstance
(
errors
,
list
):
errors
=
"
\n
"
.
join
(
errors
)
# There was an error
raise
ReaderException
(
"There was an error while parsing MARCXML:
%s
"
%
(
errors
,))
for
key
,
values
in
iteritems
(
record
):
if
key
<
'010'
and
key
.
isdigit
():
self
.
rec_tree
[
key
]
=
[
value
[
3
]
for
value
in
values
]
else
:
for
value
in
values
:
field
=
SaveDict
()
for
subfield
in
value
[
0
]:
dict_extend_helper
(
field
,
subfield
[
0
],
subfield
[
1
])
dict_extend_helper
(
self
.
rec_tree
,
(
key
+
value
[
1
]
+
value
[
2
])
.
replace
(
' '
,
'_'
),
field
)
def
_apply_rules
(
self
,
json_id
,
field_name
,
rule
):
"""Override default behavior.
See :meth:`~invenio.modules.jsonalchemy.readers.Reader._apply_rules`.
Tries to apply all the rules defined for marc and as soon as it can
apply one it stops the process.
It also keeps the marc_tag that comes from
:meth:`._get_elements_from_blob`
:param json_id: as in
:meth:`~invenio.modules.jsonalchemy.readers.Reader._apply_rules`
:param field_name: as in
:meth:`~invenio.modules.jsonalchemy.readers.Reader._apply_rules`
:param rule: as in
:meth:`~invenio.modules.jsonalchemy.readers.Reader._apply_rules`
"""
for
field_def
in
rule
[
'rules'
]
.
get
(
self
.
_json
.
additional_info
.
master_format
,
[]):
if
not
self
.
_evaluate_before_decorators
(
field_def
):
continue
marc_tag
,
elements
=
self
.
_get_elements_from_blob
(
field_def
[
'source_tags'
])
if
not
isinstance
(
elements
,
(
list
,
tuple
)):
elements
=
(
elements
,
)
for
element
in
elements
:
tmp_field_def
=
copy
.
deepcopy
(
field_def
)
tmp_field_def
[
'source_tags'
]
=
[
marc_tag
,
]
if
not
self
.
_evaluate_on_decorators
(
tmp_field_def
,
element
):
continue
try
:
value
=
try_to_eval
(
tmp_field_def
[
'function'
],
functions
(
self
.
_json
.
additional_info
.
namespace
),
value
=
element
,
self
=
self
.
_json
)
self
.
_remove_none_values
(
value
)
info
=
self
.
_find_field_metadata
(
json_id
,
field_name
,
'creator'
,
tmp_field_def
)
self
.
_json
[
'__meta_metadata__'
][
field_name
]
=
info
self
.
_json
.
__setitem__
(
field_name
,
value
,
extend
=
True
,
exclude
=
[
'decorators'
,
'extensions'
])
except
Exception
as
e
:
self
.
_json
.
errors
.
append
(
"Rule Error - Unable to apply rule for field "
"'
%s
' with value '
%s
'.
\n
%s
"
%
(
field_name
,
element
,
str
(
e
)),)
if
field_name
in
self
.
_json
.
_dict_bson
:
break
reader
=
MarcReader
Event Timeline
Log In to Comment