Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F93753940
bibfield_reader.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Dec 1, 05:30
Size
18 KB
Mime Type
text/x-python
Expires
Tue, Dec 3, 05:30 (2 d)
Engine
blob
Format
Raw Data
Handle
22697301
Attached To
R3600 invenio-infoscience
bibfield_reader.py
View Options
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
BibField Reader
"""
__revision__
=
"$Id$"
import
datetime
import
six
from
invenio.importutils
import
try_to_eval
from
invenio.containerutils
import
SmartDict
from
invenio.bibfield_config_engine
import
BibFieldParser
as
FieldParser
class
ReaderException
(
Exception
):
"""Exception raised when some error happens reading a blob"""
pass
class
Reader
(
object
):
"""
Base class inside the hierarchy that contains several method implementations
that will be shared, eventually, by all the *Reader classes.
In this particular case this class is expecting that the base format is json,
so no conversion is needed.
"""
"""Default reader"""
def
__init__
(
self
,
blob
=
None
,
**
kwargs
):
"""
:param blob:
"""
self
.
blob
=
blob
self
.
json
=
None
self
.
_additional_info
=
kwargs
# self._additional_info['model'] = kwargs.get('model', '__default__')
self
.
_parsed
=
[]
@staticmethod
def
split_blob
(
blob
,
schema
=
None
,
**
kwargs
):
"""
In case of several records inside the blob this method specify how to
split then and work one by one afterwards.
"""
raise
NotImplementedError
()
@property
def
field_definitions
(
self
):
return
FieldParser
.
field_definitions
()
@property
def
functions
(
self
):
from
invenio.bibfield_utils
import
CFG_BIBFIELD_FUNCTIONS
return
CFG_BIBFIELD_FUNCTIONS
def
translate
(
self
):
"""
It transforms the incoming blob into a json structure using the rules
described into the field and model definitions.
To apply this rules it takes into account the type of the reader, which
in fact means the type of the source format or `master_format`
:return: Json structure (typically a dictionary)
"""
if
not
self
.
blob
:
raise
ReaderException
(
"To perform a 'translate' operation a blob is needed"
)
# If we already have the json return it, use add or update to modify it
if
self
.
json
:
return
self
.
json
self
.
json
=
{}
self
.
json
[
'__meta_metadata__'
]
=
{}
self
.
json
[
'__meta_metadata__'
][
'__additional_info__'
]
=
self
.
_additional_info
self
.
json
[
'__meta_metadata__'
][
'__aliases__'
]
=
{}
self
.
json
[
'__meta_metadata__'
][
'__errors__'
]
=
[]
self
.
json
[
'__meta_metadata__'
][
'__continuable_errors__'
]
=
[]
# if self._additional_info['model'] == '__default__' or \
# self._additional_info['model'] not in self.model_definitions:
# self.json['__meta_metadata__']['__continuable_errors__']\
# .append("Warning - Using 'default' model for 'transalte', given model: '%s'" % (self._additional_info['model'], ))
# fields = dict(zip(self.field_definitions.keys(), self.field_definitions.keys()))
# else:
# fields = self.model_definitions[self._additional_info['model']]['fields']
fields
=
dict
(
zip
(
self
.
field_definitions
.
keys
(),
self
.
field_definitions
.
keys
()))
self
.
add
(
self
.
json
,
self
.
blob
,
fields
)
return
self
.
json
.
_dict
def
add
(
self
,
json
,
blob
,
fields
):
"""Adds the list of fields to the json structure"""
self
.
json
=
json
if
isinstance
(
json
,
SmartDict
)
else
SmartDict
(
json
)
self
.
blob
=
blob
if
not
self
.
blob
or
not
self
.
json
:
raise
ReaderException
(
"To perform an 'add' operation a json structure and a blob are needed"
)
if
not
isinstance
(
fields
,
dict
):
if
isinstance
(
fields
,
six
.
string_types
):
fields
=
(
fields
,
)
# try:
# model = self.json['__meta_metadata__']['__additional_info__']['model']
# except KeyError as e:
# raise ReaderException('The json structure must contain a model (%s)' % (e, ))
# if model == '__default__' or model not in self.model_definitions:
# self.json['__meta_metadata__']['__continuable_errors__']\
# .append("Warning - Using 'default' model for 'add', given model: '%s'" % (model, ))
# fields = dict(zip(fields, fields))
# else:
# fields = dict((field, self.model_definitions[model]['fields'].get(field, field))
# for field in fields)
fields
=
dict
(
zip
(
fields
,
fields
))
self
.
_prepare_blob
()
for
field_name
,
json_id
in
fields
.
items
():
self
.
_unpack_rule
(
json_id
,
field_name
)
self
.
_post_process_json
()
def
set
(
self
,
json
,
field
):
"""
"""
self
.
json
=
json
if
isinstance
(
json
,
SmartDict
)
else
SmartDict
(
json
)
# try:
# model = self.json['__meta_metadata__']['__additional_info__']['model']
# except KeyError as e:
# raise ReaderException('The json structure must contain a model (%s)' % (e, ))
# if model == '__default__' or model not in self.model_definitions:
# self.json['__meta_metadata__']['__continuable_errors__']\
# .append("Warning - Using 'default' model for 'add', given model: '%s'" % (model, ))
# json_id = field
# else:
# json_id = self.model_definitions[model]['fields'].get(field, field)
json_id
=
field
try
:
rule
=
self
.
field_definitions
[
json_id
]
except
KeyError
:
rule
=
{}
self
.
json
[
'__meta_metadata__'
][
'__continuable_errors__'
]
\
.
append
(
"Adding a new field '
%s
' without definition"
%
(
field
))
try
:
if
self
.
json
[
'__meta_metadata__'
][
'__additional_info__'
][
'master_format'
]
in
rule
[
'rules'
]:
rule_def
=
rule
[
'rules'
][
self
.
json
[
'__meta_metadata__'
][
'__additional_info__'
][
'master_format'
]][
0
]
rule_type
=
'creator'
elif
'derived'
in
rule
[
'rules'
]:
rule_def
=
rule
[
'rules'
][
'derived'
][
0
]
rule_type
=
'derived'
elif
'calculated'
in
rule
[
'rules'
]:
rule_def
=
rule
[
'rules'
][
'calculated'
][
0
]
rule_type
=
'calculated'
else
:
rule_def
=
{}
rule_type
=
'UNKNOWN'
except
KeyError
:
rule_def
=
{}
rule_type
=
'UNKNOWN'
self
.
json
[
'__meta_metadata__'
][
field
]
=
self
.
_find_meta_metadata
(
json_id
,
field
,
rule_type
,
rule
,
rule_def
)
def
update
(
self
,
json
,
blob
,
fields
=
None
):
"""
Tries to update the json structure with the fields given.
If no fields are given then it will try to update all the fields inside
the json structure.
"""
if
not
blob
or
not
blob
:
raise
ReaderException
(
"To perform an 'add' operation a json structure and a blob are needed"
)
# try:
# model = json['__meta_metadata__']['__additional_info__']['model']
# except KeyError as e:
# raise ReaderException('The json structure must contain a model (%s)' % (e, ))
if
not
fields
:
fields
=
dict
(
zip
(
json
.
keys
(),
json
.
keys
()))
# if model == '__default__' or model not in self.model_definitions:
# json['__meta_metadata__']['__continuable_errors__']\
# .append("Warning - Using 'default' model for 'update', given model: '%s'" % (model, ))
# else:
# fields = dict(fields, **self.model_definitions[model]['fields'])
elif
not
isinstance
(
fields
,
dict
):
if
isinstance
(
fields
,
six
.
string_types
):
fields
=
(
fields
,
)
# if model == '__default__' or model not in self.model_definitions:
# json['__meta_metadata__']['__continuable_errors__']\
# .append("Warning - Using 'default' model for 'update', given model: '%s'" % (model, ))
# fields = dict(zip(fields, fields))
# else:
# fields = dict((field, self.model_definitions[model]['fields'].get(field, field))
# for field in fields)
fields
=
dict
(
zip
(
fields
,
fields
))
# for key in fields.keys():
# del json['key']
self
.
add
(
json
,
blob
,
fields
)
def
validate
(
self
,
reset
=
True
):
"""docstring for validate"""
pass
def
_prepare_blob
(
self
,
*
args
,
**
kwargs
):
"""
Responsible of doing any kind of transformation over the blob before the
translation begins
"""
raise
NotImplemented
def
_get_elements_from_blob
(
self
,
regex_key
):
"""
Should handle 'entire_record' and '*'
Not an iterator!
"""
raise
NotImplemented
def
_unpack_rule
(
self
,
json_id
,
field_name
=
None
):
"""From the field definitions extract the rules an tries to apply them"""
try
:
rule_def
=
self
.
field_definitions
[
json_id
]
except
KeyError
as
e
:
self
.
json
[
'__meta_metadata__'
][
'__continuable_errors__'
]
.
append
(
"Error - Unable to find '
%s
' field definition"
%
(
json_id
,
))
return
False
# if not field_name:
# model = self.json['__meta_metadata__']['__additional_info__']['model']
# if model == '__default__' or model not in self.model_definitions:
# field_name = json_id
# else:
# field_name = self.model_definitions[model].get(json_id, json_id)
field_name
=
json_id
# Undo the workaround for [0] and [n]
if
isinstance
(
rule_def
,
list
):
return
all
(
map
(
self
.
_unpack_rule
,
rule_def
))
# Already parsed, avoid doing it again
if
(
json_id
,
field_name
)
in
self
.
_parsed
:
return
field_name
in
self
.
json
self
.
_parsed
.
append
((
json_id
,
field_name
))
return
self
.
_apply_rules
(
json_id
,
field_name
,
rule_def
)
or
\
self
.
_apply_virtual_rules
(
json_id
,
field_name
,
rule_def
)
def
_apply_rules
(
self
,
json_id
,
field_name
,
rule_def
):
"""Tries to apply a 'creator' rule"""
applied
=
False
for
rule
in
rule_def
[
'rules'
]
.
get
(
self
.
json
[
'__meta_metadata__'
][
'__additional_info__'
][
'master_format'
],
[]):
elements
=
self
.
_get_elements_from_blob
(
rule
[
'source_tag'
])
if
not
elements
:
self
.
_set_default_value
(
json_id
,
field_name
)
return
False
if
not
self
.
_evaluate_decorators
(
rule
):
return
False
if
'entire_record'
in
rule
[
'source_tag'
]
or
'*'
in
rule
[
'source_tag'
]:
try
:
value
=
try_to_eval
(
rule
[
'value'
],
self
.
functions
,
value
=
elements
,
self
=
self
.
json
)
self
.
_remove_none_values
(
value
)
info
=
self
.
_find_meta_metadata
(
json_id
,
field_name
,
'creator'
,
rule
,
rule_def
)
if
'json_ext'
in
rule_def
:
value
=
rule_def
[
'json_ext'
][
'dumps'
](
value
)
self
.
json
.
set
(
field_name
,
value
,
extend
=
True
)
self
.
json
[
'__meta_metadata__.
%s
'
%
(
SmartDict
.
main_key_pattern
.
sub
(
''
,
field_name
),
)]
=
info
applied
=
True
except
Exception
as
e
:
self
.
json
[
'__meta_metadata__'
][
'__errors__'
]
\
.
append
(
'Rule Error - Unable to apply rule for field
%s
-
%s
'
%
(
field_name
,
str
(
e
)),)
applied
=
False
else
:
for
element
in
elements
:
if
not
isinstance
(
element
,
(
list
,
tuple
)):
element
=
(
element
,
)
applied
=
False
for
e
in
element
:
if
rule
[
'only_if_master_value'
]
and
\
not
all
(
try_to_eval
(
rule
[
'only_if_master_value'
],
self
.
functions
,
value
=
e
,
self
=
self
.
json
)):
applied
=
applied
or
False
else
:
try
:
value
=
try_to_eval
(
rule
[
'value'
],
self
.
functions
,
value
=
e
,
self
=
self
.
json
)
self
.
_remove_none_values
(
value
)
info
=
self
.
_find_meta_metadata
(
json_id
,
field_name
,
'creator'
,
rule
,
rule_def
)
if
'json_ext'
in
rule_def
:
value
=
rule_def
[
'json_ext'
][
'dumps'
](
value
)
self
.
json
.
set
(
field_name
,
value
,
extend
=
True
)
self
.
json
[
'__meta_metadata__.
%s
'
%
(
SmartDict
.
main_key_pattern
.
sub
(
''
,
field_name
),
)]
=
info
applied
=
applied
or
True
except
Exception
as
e
:
self
.
json
[
'__meta_metadata__'
][
'__errors__'
]
\
.
append
(
'Rule Error - Unable to apply rule for field
%s
-
%s
'
%
(
field_name
,
str
(
e
)),)
applied
=
applied
or
False
if
field_name
not
in
self
.
json
or
not
applied
:
self
.
_set_default_value
(
json_id
,
field_name
)
return
applied
def
_apply_virtual_rules
(
self
,
json_id
,
field_name
,
rule_def
):
"""Tries to apply either a 'derived' or 'calculated' rule"""
rules
=
[]
rules
.
append
((
'calculated'
,
rule_def
[
'rules'
]
.
get
(
'calculated'
,
[])))
rules
.
append
((
'derived'
,
rule_def
[
'rules'
]
.
get
(
'derived'
,
[])))
for
(
rule_type
,
rrules
)
in
rules
:
for
rule
in
rrules
:
if
not
self
.
_evaluate_decorators
(
rule
):
return
False
try
:
info
=
self
.
_find_meta_metadata
(
json_id
,
field_name
,
rule_type
,
rule
,
rule_def
)
if
rule_type
==
'derived'
or
rule
[
'memoize'
]:
value
=
try_to_eval
(
rule
[
'value'
],
self
.
functions
,
self
=
self
.
json
)
if
'json_ext'
in
rule_def
:
value
=
rule_def
[
'json_ext'
][
'dumps'
](
value
)
self
.
_remove_none_values
(
value
)
else
:
value
=
None
self
.
json
.
set
(
field_name
,
value
,
extend
=
True
)
self
.
json
[
'__meta_metadata__.
%s
'
%
(
SmartDict
.
main_key_pattern
.
sub
(
''
,
field_name
),
)]
=
info
except
Exception
as
e
:
self
.
json
[
'__meta_metadata__'
][
'__continuable_errors__'
]
\
.
append
(
'Virtual Rule CError - Unable to evaluate
%s
-
%s
'
%
(
field_name
,
str
(
e
)))
return
False
if
field_name
not
in
self
.
json
:
self
.
_set_default_value
(
json_id
,
field_name
)
return
True
def
_evaluate_decorators
(
self
,
rule
):
"""Evaluates all 'decorators' related with the current rule"""
if
rule
[
'parse_first'
]:
map
(
self
.
_unpack_rule
,
try_to_eval
(
rule
[
'parse_first'
]))
if
rule
[
'depends_on'
]:
for
key
in
try_to_eval
(
rule
[
'depends_on'
]):
if
key
in
self
.
json
:
continue
main_key
=
SmartDict
.
main_key_pattern
.
sub
(
''
,
key
)
if
not
self
.
_unpack_rule
(
main_key
):
return
False
if
rule
[
'only_if'
]
and
not
all
(
try_to_eval
(
rule
[
'only_if'
],
self
.
functions
,
self
=
self
.
json
)):
return
False
return
True
def
_find_meta_metadata
(
self
,
json_id
,
field_name
,
rule_type
,
rule
,
rule_def
):
"""Given one rule fills up the parallel dictionary with the needed meta-metadata"""
for
alias
in
rule_def
.
get
(
'aliases'
,
[]):
self
.
json
[
'__meta_metadata__.__aliases__.
%s
'
%
(
alias
,
)]
=
field_name
info
=
{}
info
[
'timestamp'
]
=
datetime
.
datetime
.
now
()
.
isoformat
()
if
rule_def
.
get
(
'persistent_identifier'
,
None
)
is
not
None
:
info
[
'pid'
]
=
rule_def
[
'persistent_identifier'
]
info
[
'memoize'
]
=
rule
.
get
(
'memoize'
,
None
)
info
[
'type'
]
=
rule_type
if
rule_type
in
(
'calculated'
,
'derived'
):
info
[
'function'
]
=
(
json_id
,
'rules'
,
rule_type
,
0
,
'value'
)
elif
rule_type
==
'UNKNOWN'
:
info
[
'function'
]
=
'UNKNOWN'
info
[
'source_tag'
]
=
'UNKNOWN'
else
:
info
[
'source_tag'
]
=
rule
[
'source_tag'
]
if
'json_ext'
in
rule
:
info
[
'dumps'
]
=
(
json_id
,
'json_ext'
,
'dumps'
)
info
[
'loads'
]
=
(
json_id
,
'json_ext'
,
'loads'
)
return
info
def
_set_default_value
(
self
,
json_id
,
field_name
):
"""
Finds the default value inside the schema, if any
:return: tuple containing if the value is required and the default value.
"""
schema
=
self
.
field_definitions
[
json_id
]
.
get
(
'schema'
,
{})
.
get
(
json_id
)
if
schema
and
'default'
in
schema
:
try
:
value
=
schema
[
'default'
]()
try
:
value
=
self
.
field_definitions
[
json_id
][
'json_ext'
][
'dumps'
](
value
)
except
KeyError
:
pass
self
.
json
.
set
(
field_name
,
value
,
extend
=
True
)
except
Exception
,
e
:
self
.
json
[
'__meta_metadata__'
][
'__continuable_errors__'
]
\
.
append
(
'Default Value CError - Unable to set default value for
%s
-
%s
'
%
(
field_name
,
str
(
e
)))
def
_remove_none_values
(
self
,
obj
):
"""Handy closure to remove recursively None values from obj"""
if
isinstance
(
obj
,
dict
):
for
key
,
value
in
obj
.
items
():
if
value
is
None
:
del
obj
[
key
]
else
:
self
.
_remove_none_values
(
value
)
if
isinstance
(
obj
,
list
):
for
element
in
obj
:
if
element
is
None
:
obj
.
remove
(
element
)
else
:
self
.
_remove_none_values
(
element
)
def
_post_process_json
(
self
):
"""
Responsible of doing any kind of transformation over the json structure
after it is created, e.g. pruning the json to delete None values or
singletons.
"""
pass
Event Timeline
Log In to Comment