Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90392776
bibformat_xslt_engine.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Nov 1, 06:39
Size
22 KB
Mime Type
text/x-python
Expires
Sun, Nov 3, 06:39 (2 d)
Engine
blob
Format
Raw Data
Handle
22066304
Attached To
R3600 invenio-infoscience
bibformat_xslt_engine.py
View Options
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2007, 2008, 2009, 2010, 2011 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
bibformat_xslt_engine - Wrapper for an XSLT engine.
Some functions are registered in order to be used in XSL code:
- creation_date(recID)
- modification_date(recID)
Dependencies: Need one of the following XSLT processors:
- lxml
- libxml2 & libxslt
- 4suite
Used by: bibformat_engine.py
"""
__revision__
=
"$Id$"
import
sys
import
os
from
invenio.config
import
\
CFG_SITE_URL
from
invenio.bibformat_config
import
\
CFG_BIBFORMAT_TEMPLATES_PATH
from
invenio.bibformat_dblayer
import
\
get_creation_date
,
\
get_modification_date
# The namespace used for BibFormat function
CFG_BIBFORMAT_FUNCTION_NS
=
"http://cdsweb.cern.ch/bibformat/fn"
# Import one XSLT processor
#
# processor_type:
# 0 : No processor found
# 1 : lxml
# 2 : libxslt
# 3 : 4suite
processor_type
=
0
try
:
# lxml
from
lxml
import
etree
processor_type
=
1
except
ImportError
:
pass
if
processor_type
==
0
:
try
:
# libxml2 & libxslt
import
libxml2
import
libxslt
processor_type
=
2
except
ImportError
:
pass
if
processor_type
==
0
:
try
:
# 4suite
from
Ft.Xml.Xslt
import
Processor
from
Ft.Xml
import
InputSource
from
xml.dom
import
Node
processor_type
=
3
except
ImportError
:
pass
if
processor_type
==
0
:
# No XSLT processor found
sys
.
stderr
.
write
(
'No XSLT processor could be found.
\n
'
\
'No output produced.
\n
'
)
#sys.exit(1)
##################################################################
# Support for 'creation_date' and 'modification_date' functions #
def
get_creation_date_lxml
(
ctx
,
recID
,
fmt
=
"%Y-%m-
%d
T%H:%M:%SZ"
):
"""
libxslt extension function:
Bridge between BibFormat and XSL stylesheets.
Returns record creation date.
Can be used in that way in XSL stylesheet
(provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared):
<xsl:value-of select="fn:creation_date(445)"/> where 445 is a recID
if recID is string, value is converted to int
if recID is Node, first child node (text node) is taken as value
@param ctx: context as passed by lxml
@param recID: record ID
@param fmt: format of the returned date
@return: creation date of X{recID}
@rtype: string
"""
try
:
if
isinstance
(
recID
,
str
):
recID_int
=
int
(
recID
)
elif
isinstance
(
recID
,
(
int
,
long
)):
recID_int
=
recID
elif
isinstance
(
recID
,
list
):
recID
=
recID
[
0
]
if
isinstance
(
recID
,
str
):
recID_int
=
int
(
recID
)
else
:
recID_int
=
int
(
recID
.
text
)
else
:
recID_int
=
int
(
recID
.
text
)
if
isinstance
(
fmt
,
str
):
fmt_str
=
fmt
elif
isinstance
(
fmt
,
list
):
fmt
=
fmt
[
0
]
if
isinstance
(
fmt
,
str
):
fmt_str
=
fmt
else
:
fmt_str
=
fmt
.
text
else
:
fmt_str
=
fmt
.
text
return
get_creation_date
(
recID_int
,
fmt_str
)
except
Exception
,
err
:
sys
.
stderr
.
write
(
"Error during formatting function evaluation: "
+
\
str
(
err
)
+
\
'
\n
'
)
return
''
def
get_creation_date_libxslt
(
ctx
,
recID
,
fmt
=
"%Y-%m-
%d
T%H:%M:%SZ"
):
"""
libxslt extension function:
Bridge between BibFormat and XSL stylesheets.
Returns record creation date.
Can be used in that way in XSL stylesheet
(provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared):
<xsl:value-of select="fn:creation_date(445)"/> where 445 is a recID
if recID is string, value is converted to int
if recID is Node, first child node (text node) is taken as value
@param ctx: context as passed by libxslt
@param recID: record ID
@param fmt: format of the returned date
@return: creation date of X{recID}
@rtype: string
"""
try
:
if
isinstance
(
recID
,
str
):
recID_int
=
int
(
recID
)
elif
isinstance
(
recID
,
(
int
,
long
)):
recID_int
=
recID
else
:
recID_int
=
libxml2
.
xmlNode
(
_obj
=
recID
[
0
])
.
children
.
content
if
isinstance
(
fmt
,
str
):
fmt_str
=
fmt
else
:
fmt_str
=
libxml2
.
xmlNode
(
_obj
=
recID
[
0
])
.
children
.
content
return
get_creation_date
(
recID_int
,
fmt_str
)
except
Exception
,
err
:
sys
.
stderr
.
write
(
"Error during formatting function evaluation: "
+
\
str
(
err
)
+
\
'
\n
'
)
return
''
def
get_creation_date_4suite
(
ctx
,
recID
,
fmt
=
"%Y-%m-
%d
T%H:%M:%SZ"
):
"""
4suite extension function:
Bridge between BibFormat and XSL stylesheets.
Returns record creation date.
Can be used in that way in XSL stylesheet
(provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared):
<xsl:value-of select="fn:creation_date(445)"/>
if value is int, value is converted to string
if value is Node, first child node (text node) is taken as value
@param ctx: context as passed by 4suite
@param recID: record ID
@param fmt: format of the returned date
@return: creation date of X{recID}
@rtype: string
"""
try
:
if
len
(
recID
)
>
0
and
isinstance
(
recID
[
0
],
Node
):
recID_int
=
recID
[
0
]
.
firstChild
.
nodeValue
if
recID_int
is
None
:
return
''
else
:
recID_int
=
int
(
recID
)
if
len
(
fmt
)
>
0
and
isinstance
(
fmt
[
0
],
Node
):
fmt_str
=
fmt
[
0
]
.
firstChild
.
nodeValue
if
fmt_str
is
None
:
fmt_str
=
"%Y-%m-
%d
T%H:%M:%SZ"
else
:
fmt_str
=
str
(
fmt
)
return
get_creation_date
(
recID_int
,
fmt_str
)
except
Exception
,
err
:
sys
.
stderr
.
write
(
"Error during formatting function evaluation: "
+
\
str
(
err
)
+
\
'
\n
'
)
return
''
def
get_modification_date_lxml
(
ctx
,
recID
,
fmt
=
"%Y-%m-
%d
T%H:%M:%SZ"
):
"""
libxslt extension function:
Bridge between BibFormat and XSL stylesheets.
Returns record modification date.
Can be used in that way in XSL stylesheet
(provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared):
<xsl:value-of select="fn:creation_date(445)"/> where 445 is a recID
if recID is string, value is converted to int
if recID is Node, first child node (text node) is taken as value
@param ctx: context as passed by lxml
@param recID: record ID
@param fmt: format of the returned date
@return: modification date of X{recID}
@rtype: string
"""
try
:
if
isinstance
(
recID
,
str
):
recID_int
=
int
(
recID
)
elif
isinstance
(
recID
,
(
int
,
long
)):
recID_int
=
recID
elif
isinstance
(
recID
,
list
):
recID
=
recID
[
0
]
if
isinstance
(
recID
,
str
):
recID_int
=
int
(
recID
)
else
:
recID_int
=
int
(
recID
.
text
)
else
:
recID_int
=
int
(
recID
.
text
)
if
isinstance
(
fmt
,
str
):
fmt_str
=
fmt
elif
isinstance
(
fmt
,
list
):
fmt
=
fmt
[
0
]
if
isinstance
(
fmt
,
str
):
fmt_str
=
fmt
else
:
fmt_str
=
fmt
.
text
else
:
fmt_str
=
fmt
.
text
return
get_modification_date
(
recID_int
,
fmt_str
)
except
Exception
,
err
:
sys
.
stderr
.
write
(
"Error during formatting function evaluation: "
+
\
str
(
err
)
+
\
'
\n
'
)
return
''
def
get_modification_date_libxslt
(
ctx
,
recID
,
fmt
=
"%Y-%m-
%d
T%H:%M:%SZ"
):
"""
libxslt extension function:
Bridge between BibFormat and XSL stylesheets.
Returns record modification date.
Can be used in that way in XSL stylesheet
(provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared):
<xsl:value-of select="fn:creation_date(445)"/> where 445 is a recID
if recID is string, value is converted to int
if recID is Node, first child node (text node) is taken as value
@param ctx: context as passed by libxslt
@param recID: record ID
@param fmt: format of the returned date
@return: modification date of X{recID}
@rtype: string
"""
try
:
if
isinstance
(
recID
,
str
):
recID_int
=
int
(
recID
)
elif
isinstance
(
recID
,
(
int
,
long
)):
recID_int
=
recID
else
:
recID_int
=
libxml2
.
xmlNode
(
_obj
=
recID
[
0
])
.
children
.
content
if
isinstance
(
fmt
,
str
):
fmt_str
=
fmt
else
:
fmt_str
=
libxml2
.
xmlNode
(
_obj
=
recID
[
0
])
.
children
.
content
return
get_modification_date
(
recID_int
,
fmt_str
)
except
Exception
,
err
:
sys
.
stderr
.
write
(
"Error during formatting function evaluation: "
+
\
str
(
err
)
+
\
'
\n
'
)
return
''
def
get_modification_date_4suite
(
ctx
,
recID
,
fmt
=
"%Y-%m-
%d
T%H:%M:%SZ"
):
"""
4suite extension function:
Bridge between BibFormat and XSL stylesheets.
Returns record modification date.
Can be used in that way in XSL stylesheet
(provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared):
<xsl:value-of select="fn:modification_date(445)"/>
if value is int, value is converted to string
if value is Node, first child node (text node) is taken as value
@param ctx: context as passed by 4suite
@param recID: record ID
@param fmt: format of the returned date
@return: modification date of X{recID}
@rtype: string
"""
try
:
if
len
(
recID
)
>
0
and
isinstance
(
recID
[
0
],
Node
):
recID_int
=
recID
[
0
]
.
firstChild
.
nodeValue
if
recID_int
is
None
:
return
''
else
:
recID_int
=
int
(
recID_int
)
if
len
(
fmt
)
>
0
and
isinstance
(
fmt
[
0
],
Node
):
fmt_str
=
fmt
[
0
]
.
firstChild
.
nodeValue
if
fmt_str
is
None
:
fmt_str
=
"%Y-%m-
%d
T%H:%M:%SZ"
else
:
fmt_str
=
str
(
fmt
)
return
get_modification_date
(
recID_int
,
fmt_str
)
except
Exception
,
err
:
sys
.
stderr
.
write
(
"Error during formatting function evaluation: "
+
\
str
(
err
)
+
\
'
\n
'
)
return
''
def
eval_bibformat_lxml
(
ctx
,
recID
,
template_code
):
"""
libxslt extension function:
Bridge between BibFormat and XSL stylesheets.
Returns the evaluation of the given piece of format template
Can be used in that way in XSL stylesheet
(provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared):
<xsl:value-of select="fn:eval_bibformat(marc:controlfield[@tag='001'],'<BFE_SERVER_INFO var="recurl">')" />
if recID is string, value is converted to int
if recID is Node, first child node (text node) is taken as value
template_code is evaluated as a format template piece of code. '<'
and '"' need to be escaped with '<' and '"'
@param ctx: context as passed by lxml
@param recID: record ID
@param template_code: the code calling a BFE_ as it would be use in format template
@return: the evalued call to a format template (usually a call to a format element)
@rtype: string
"""
#'
from
invenio.bibformat_engine
import
\
format_with_format_template
,
\
BibFormatObject
try
:
if
isinstance
(
recID
,
str
):
recID_int
=
int
(
recID
)
elif
isinstance
(
recID
,
(
int
,
long
)):
recID_int
=
recID
elif
isinstance
(
recID
,
list
):
recID
=
recID
[
0
]
if
isinstance
(
recID
,
str
):
recID_int
=
int
(
recID
)
else
:
recID_int
=
int
(
recID
.
text
)
else
:
recID_int
=
int
(
recID
.
text
)
bfo
=
BibFormatObject
(
recID_int
)
return
format_with_format_template
(
None
,
bfo
,
verbose
=
0
,
format_template_code
=
template_code
)
except
Exception
,
err
:
sys
.
stderr
.
write
(
"Error during formatting function evaluation: "
+
\
str
(
err
)
+
\
'
\n
'
)
return
''
def
eval_bibformat_libxslt
(
ctx
,
recID
,
template_code
):
"""
libxslt extension function:
Bridge between BibFormat and XSL stylesheets.
Returns the evaluation of the given piece of format template
Can be used in that way in XSL stylesheet
(provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared):
<xsl:value-of select="fn:eval_bibformat(marc:controlfield[@tag='001'],'<BFE_SERVER_INFO var="recurl">')" />
if recID is string, value is converted to int
if recID is Node, first child node (text node) is taken as value
template_code is evaluated as a format template piece of code. '<'
and '"' need to be escaped with '<' and '"'
@param ctx: context as passed by libxslt
@param recID: record ID
@param template_code: the code calling a BFE_ as it would be use in format template
@return: the evalued call to a format template (usually a call to a format element)
@rtype: string
"""
#'
from
invenio.bibformat_engine
import
\
format_with_format_template
,
\
BibFormatObject
try
:
if
isinstance
(
recID
,
str
):
recID_int
=
int
(
recID
)
elif
isinstance
(
recID
,
(
int
,
long
)):
recID_int
=
recID
else
:
recID_int
=
libxml2
.
xmlNode
(
_obj
=
recID
[
0
])
.
children
.
content
bfo
=
BibFormatObject
(
recID_int
)
return
format_with_format_template
(
None
,
bfo
,
verbose
=
0
,
format_template_code
=
template_code
)
except
Exception
,
err
:
sys
.
stderr
.
write
(
"Error during formatting function evaluation: "
+
\
str
(
err
)
+
\
'
\n
'
)
return
''
def
eval_bibformat_4suite
(
ctx
,
recID
,
template_code
):
"""
4suite extension function:
Bridge between BibFormat and XSL stylesheets.
Returns the evaluation of the given piece of format template
Can be used in that way in XSL stylesheet
(provided xmlns:fn="http://cdsweb.cern.ch/bibformat/fn" has been declared):
<xsl:value-of select="fn:eval_bibformat(marc:controlfield[@tag='001'],'<BFE_SERVER_INFO var="recurl">')" />
if recID is string, value is converted to int
if recID is Node, first child node (text node) is taken as value
template_code is evaluated as a format template piece of code. '<'
and '"' need to be escaped with '<' and '"'
@param ctx: context as passed by 4suite
@param recID: record ID
@param template_code: the code calling a BFE_ as it would be use in format template
@return: the evalued call to a format template (usually a call to a format element)
@rtype: string
"""
#'
from
invenio.bibformat_engine
import
\
format_with_format_template
,
\
BibFormatObject
try
:
if
len
(
recID
)
>
0
and
isinstance
(
recID
[
0
],
Node
):
recID_int
=
recID
[
0
]
.
firstChild
.
nodeValue
if
recID_int
is
None
:
return
''
else
:
recID_int
=
int
(
recID_int
)
bfo
=
BibFormatObject
(
recID_int
)
return
format_with_format_template
(
None
,
bfo
,
verbose
=
0
,
format_template_code
=
template_code
)
except
Exception
,
err
:
sys
.
stderr
.
write
(
"Error during formatting function evaluation: "
+
\
str
(
err
)
+
\
'
\n
'
)
return
''
# End of date-related functions #
##################################################################
def
format
(
xmltext
,
template_filename
=
None
,
template_source
=
None
):
"""
Processes an XML text according to a template, and returns the result.
The template can be given either by name (or by path) or by source.
If source is given, name is ignored.
bibformat_xslt_engine will look for template_filename in standard directories
for templates. If not found, template_filename will be assumed to be a path to
a template. If none can be found, return None.
@param xmltext: The string representation of the XML to process
@param template_filename: The name of the template to use for the processing
@param template_source: The configuration describing the processing.
@return: the transformed XML text.
"""
if
processor_type
==
0
:
# No XSLT processor found
sys
.
stderr
.
write
(
'No XSLT processor could be found.'
)
#sys.exit(1)
# Retrieve template and read it
if
template_source
:
template
=
template_source
elif
template_filename
:
try
:
path_to_templates
=
(
CFG_BIBFORMAT_TEMPLATES_PATH
+
os
.
sep
+
template_filename
)
if
os
.
path
.
exists
(
path_to_templates
):
template
=
file
(
path_to_templates
)
.
read
()
elif
os
.
path
.
exists
(
template_filename
):
template
=
file
(
template_filename
)
.
read
()
else
:
sys
.
stderr
.
write
(
template_filename
+
' does not exist.'
)
return
None
except
IOError
:
sys
.
stderr
.
write
(
template_filename
+
' could not be read.'
)
return
None
else
:
sys
.
stderr
.
write
(
template_filename
+
' was not given.'
)
return
None
# Some massaging of the input to avoid the default namespace issue
# in XPath. More elegant solution might be found though.
xmltext
=
xmltext
.
replace
(
'xmlns="http://www.loc.gov/MARC21/slim"'
,
''
)
# For older MARCXML records stored in bibfmt with empty indicators
xmltext
=
xmltext
.
replace
(
'ind1=""'
,
'ind1=" "'
)
xmltext
=
xmltext
.
replace
(
'ind2=""'
,
'ind2=" "'
)
result
=
""
if
processor_type
==
1
:
# lxml
try
:
xml
=
etree
.
XML
(
xmltext
)
except
etree
.
XMLSyntaxError
,
e
:
error
=
'The XML code given is invalid. [
%s
]'
%
(
e
,)
sys
.
stderr
.
write
(
error
)
return
result
except
:
error
=
'Failed to process the XML code.'
sys
.
stderr
.
write
(
error
)
return
result
try
:
xsl
=
etree
.
XML
(
template
)
except
etree
.
XMLSyntaxError
,
e
:
error
=
'The XSL code given is invalid. [
%s
]'
%
(
e
,)
sys
.
stderr
.
write
(
error
)
return
result
except
:
error
=
'Failed to process the XSL code.'
sys
.
stderr
.
write
(
error
)
return
result
try
:
fns
=
etree
.
FunctionNamespace
(
CFG_BIBFORMAT_FUNCTION_NS
)
fns
[
"creation_date"
]
=
get_creation_date_lxml
fns
[
"modification_date"
]
=
get_modification_date_lxml
fns
[
"eval_bibformat"
]
=
eval_bibformat_lxml
except
etree
.
NamespaceRegistryError
,
e
:
error
=
'Failed registering the XPath extension function. [
%s
]'
%
(
e
,)
sys
.
stderr
.
write
(
error
)
return
result
try
:
xslt
=
etree
.
XSLT
(
xsl
)
except
etree
.
XSLTParseError
,
e
:
error
=
'The XSL code given is invalid. [
%s
]'
%
(
e
,)
sys
.
stderr
.
write
(
error
)
return
result
except
:
error
=
'Failed to process the XSL code.'
sys
.
stderr
.
write
(
error
)
return
result
try
:
temporary_result
=
xslt
(
xml
)
except
:
error
=
'Failed to perform the XSL transformation.'
sys
.
stderr
.
write
(
error
)
return
result
result
=
str
(
temporary_result
)
# Housekeeping
del
temporary_result
del
xslt
del
xsl
del
xml
elif
processor_type
==
2
:
# libxml2 & libxslt
# Register BibFormat functions for use in XSL
libxslt
.
registerExtModuleFunction
(
"creation_date"
,
CFG_BIBFORMAT_FUNCTION_NS
,
get_creation_date_libxslt
)
libxslt
.
registerExtModuleFunction
(
"modification_date"
,
CFG_BIBFORMAT_FUNCTION_NS
,
get_modification_date_libxslt
)
libxslt
.
registerExtModuleFunction
(
"eval_bibformat"
,
CFG_BIBFORMAT_FUNCTION_NS
,
eval_bibformat_libxslt
)
# Load template and source
template_xml
=
libxml2
.
parseDoc
(
template
)
processor
=
libxslt
.
parseStylesheetDoc
(
template_xml
)
source
=
libxml2
.
parseDoc
(
xmltext
)
# Transform
result_object
=
processor
.
applyStylesheet
(
source
,
None
)
try
:
result
=
processor
.
saveResultToString
(
result_object
)
except
SystemError
:
# Catch an exception thrown when result is empty,
# due to a bug in libxslt
result
=
''
# Deallocate
processor
.
freeStylesheet
()
source
.
freeDoc
()
result_object
.
freeDoc
()
elif
processor_type
==
3
:
# 4suite
# Init
processor
=
Processor
.
Processor
()
# Register BibFormat functions for use in XSL
processor
.
registerExtensionFunction
(
CFG_BIBFORMAT_FUNCTION_NS
,
"creation_date"
,
get_creation_date_4suite
)
processor
.
registerExtensionFunction
(
CFG_BIBFORMAT_FUNCTION_NS
,
"modification_date"
,
get_modification_date_4suite
)
processor
.
registerExtensionFunction
(
CFG_BIBFORMAT_FUNCTION_NS
,
"eval_bibformat"
,
eval_bibformat_4suite
)
# Load template and source
transform
=
InputSource
.
DefaultFactory
.
fromString
(
template
,
uri
=
CFG_SITE_URL
)
source
=
InputSource
.
DefaultFactory
.
fromString
(
xmltext
,
uri
=
CFG_SITE_URL
)
processor
.
appendStylesheet
(
transform
)
# Transform
result
=
processor
.
run
(
source
)
else
:
sys
.
stderr
.
write
(
"No XSLT processor could be found"
)
return
result
if
__name__
==
"__main__"
:
pass
Event Timeline
Log In to Comment