Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90639359
api.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 3, 12:02
Size
20 KB
Mime Type
text/x-python
Expires
Tue, Nov 5, 12:02 (2 d)
Engine
blob
Format
Raw Data
Handle
22113193
Attached To
R3600 invenio-infoscience
api.py
View Options
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2009, 2010, 2011, 2013 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Provide API-callable functions for knowledge base management (using kb's).
"""
import
os
import
re
import
json
from
.
import
dblayer
as
bibknowledge_dblayer
from
invenio.base.globals
import
cfg
processor_type
=
0
try
:
from
lxml
import
etree
processor_type
=
1
except
ImportError
:
try
:
import
libxml2
import
libxslt
processor_type
=
2
except
ImportError
:
pass
def
get_kb_mappings
(
kb_name
=
""
,
key
=
""
,
value
=
""
,
match_type
=
"s"
):
"""Get leftside/rightside mappings from kb kb_name.
If key given, give only those with left side (mapFrom) = key.
If value given, give only those with right side (mapTo) = value.
@param kb_name: the name of the kb
@param key: include only lines matching this on left side in the results
@param value: include only lines matching this on right side in the results
@param match_type: s = substring match, e = exact match
@return a list of mappings
"""
return
bibknowledge_dblayer
.
get_kb_mappings
(
kb_name
,
keylike
=
key
,
valuelike
=
value
,
match_type
=
match_type
)
def
get_kb_mapping
(
kb_name
=
""
,
key
=
""
,
value
=
""
,
match_type
=
"e"
,
default
=
""
):
"""Get one unique mapping. If not found, return default
@param kb_name: the name of the kb
@param key: include only lines matching this on left side in the results
@param value: include only lines matching this on right side in the results
@param match_type: s = substring match, e = exact match
@return a mapping
"""
mappings
=
bibknowledge_dblayer
.
get_kb_mappings
(
kb_name
,
keylike
=
key
,
valuelike
=
value
,
match_type
=
match_type
)
if
len
(
mappings
)
==
0
:
return
default
else
:
return
mappings
[
0
]
def
add_kb_mapping
(
kb_name
,
key
,
value
=
""
):
"""
Adds a new mapping to given kb
@param kb_name: the name of the kb where to insert the new value
@param key: the key of the mapping
@param value: the value of the mapping
"""
bibknowledge_dblayer
.
add_kb_mapping
(
kb_name
,
key
,
value
)
def
remove_kb_mapping
(
kb_name
,
key
):
"""
Delete an existing kb mapping in kb
@param kb_name: the name of the kb where to insert the new value
@param key: the key of the mapping
"""
bibknowledge_dblayer
.
remove_kb_mapping
(
kb_name
,
key
)
def
update_kb_mapping
(
kb_name
,
old_key
,
key
,
value
):
"""
Update an existing kb mapping with key old_key with a new key and value
@param kb_name: the name of the kb where to insert the new value
@param old_key: the key of the mapping in the kb
@param key: the new key of the mapping
@param value: the new value of the mapping
"""
#check if this is a KEY change or a VALUE change.
if
(
old_key
==
key
):
#value change, ok to change
bibknowledge_dblayer
.
update_kb_mapping
(
kb_name
,
old_key
,
key
,
value
)
else
:
#you can change a key unless there is already a key like that
if
kb_mapping_exists
(
kb_name
,
key
):
pass
#no, don't change
else
:
bibknowledge_dblayer
.
update_kb_mapping
(
kb_name
,
old_key
,
key
,
value
)
def
get_kb_mappings_json
(
kb_name
=
""
,
key
=
""
,
value
=
""
,
match_type
=
"s"
,
limit
=
None
):
"""Get leftside/rightside mappings from kb kb_name formatted as json dict.
If key given, give only those with left side (mapFrom) = key.
If value given, give only those with right side (mapTo) = value.
@param kb_name: the name of the kb
@param key: include only lines matching this on left side in the results
@param value: include only lines matching this on right side in the results
@param match_type: s = substring match, e = exact match
@param limit: maximum number of results to return (are ALL if set to None)
@return a list of mappings
"""
mappings
=
get_kb_mappings
(
kb_name
,
key
,
value
,
match_type
)
ret
=
[]
if
limit
is
None
:
limit
=
len
(
mappings
)
for
m
in
mappings
[:
limit
]:
label
=
m
[
'value'
]
or
m
[
'key'
]
value
=
m
[
'key'
]
or
m
[
'value'
]
ret
.
append
({
'label'
:
label
,
'value'
:
value
})
return
json
.
dumps
(
ret
)
def
get_kb_mappings_embedded_json
(
kb_name
=
""
,
key
=
""
,
value
=
""
,
match_type
=
"s"
,
limit
=
None
):
"""Get leftside/rightside mappings from kb kb_name formatted as json dict.
The rightside is actually considered as a json string and hence embedded
within the final result.
If key given, give only those with left side (mapFrom) = key.
If value given, give only those with right side (mapTo) = value.
@param kb_name: the name of the kb
@param key: include only lines matching this on left side in the results
@param value: include only lines matching this on right side in the results
@param match_type: s = substring match, e = exact match
@param limit: maximum number of results to return (are ALL if set to None)
@return a list of mappings
"""
mappings
=
get_kb_mappings
(
kb_name
,
key
,
value
,
match_type
)
ret
=
[]
if
limit
is
None
:
limit
=
len
(
mappings
)
for
m
in
mappings
[:
limit
]:
label
=
m
[
'value'
]
or
m
[
'key'
]
value
=
m
[
'key'
]
or
m
[
'value'
]
ret
.
append
({
'label'
:
label
,
'value'
:
json
.
loads
(
value
)})
return
json
.
dumps
(
ret
)
def
kb_exists
(
kb_name
):
"""Returns True if a kb with the given name exists
@param kb_name: the name of the knowledge base
"""
return
bibknowledge_dblayer
.
kb_exists
(
kb_name
)
def
get_kb_name
(
kb_id
):
"""
Returns the name of the kb given by id
@param kb_id: the id of the knowledge base
"""
return
bibknowledge_dblayer
.
get_kb_name
(
kb_id
)
def
update_kb_attributes
(
kb_name
,
new_name
,
new_description
=
''
):
"""Update kb kb_name with a new name and (optionally) description
@param kb_name: the name of the kb to update
@param new_name: the new name for the kb
@param new_description: the new description for the kb
"""
bibknowledge_dblayer
.
update_kb
(
kb_name
,
new_name
,
new_description
)
def
add_kb
(
kb_name
=
"Untitled"
,
kb_type
=
None
):
"""
Adds a new kb in database, and returns its id
The name of the kb will be 'Untitled#'
such that it is unique.
@param kb_name: the name of the kb
@param kb_type: the type of the kb, incl 'taxonomy' and 'dynamic'.
None for typical (leftside-rightside).
@return the id of the newly created kb
"""
name
=
kb_name
i
=
1
while
bibknowledge_dblayer
.
kb_exists
(
name
):
name
=
kb_name
+
" "
+
str
(
i
)
i
+=
1
kb_id
=
bibknowledge_dblayer
.
add_kb
(
name
,
""
,
kb_type
)
return
kb_id
def
add_dynamic_kb
(
kbname
,
tag
,
collection
=
""
,
searchwith
=
""
):
"""A convenience method"""
kb_id
=
add_kb
(
kb_name
=
kbname
,
kb_type
=
'dynamic'
)
bibknowledge_dblayer
.
save_kb_dyn_config
(
kb_id
,
tag
,
searchwith
,
collection
)
return
kb_id
def
kb_mapping_exists
(
kb_name
,
key
):
"""
Returns the information if a mapping exists.
@param kb_name: knowledge base name
@param key: left side (mapFrom)
"""
return
bibknowledge_dblayer
.
kb_mapping_exists
(
kb_name
,
key
)
def
delete_kb
(
kb_name
):
"""
Deletes given kb from database
@param kb_name: knowledge base name
"""
bibknowledge_dblayer
.
delete_kb
(
kb_name
)
def
get_kb_id
(
kb_name
):
"""
Gets the id by name
@param kb_name knowledge base name
"""
return
bibknowledge_dblayer
.
get_kb_id
(
kb_name
)
# Knowledge Bases Dependencies
##
def
get_elements_that_use_kb
(
name
):
"""
This routine is obsolete.
Returns a list of elements that call given kb
[ {'filename':"filename_1.py"
'name': "a name"
},
...
]
Returns elements sorted by name
"""
format_elements
=
{}
#Retrieve all elements in files
files
=
os
.
listdir
(
cfg
[
'CFG_BIBFORMAT_ELEMENTS_PATH'
])
for
filename
in
files
:
if
filename
.
endswith
(
".py"
):
path
=
cfg
[
'CFG_BIBFORMAT_ELEMENTS_PATH'
]
+
os
.
sep
+
filename
formatf
=
open
(
path
,
'r'
)
code
=
formatf
.
read
()
formatf
.
close
()
# Search for use of kb inside code
kb_pattern
=
re
.
compile
(
'''
(bfo.kb)\s* #Function call
\(\s* #Opening parenthesis
[\'"]+ #Single or double quote
(?P<kb>%s) #kb
[\'"]+\s* #Single or double quote
, #comma
'''
%
name
,
re
.
VERBOSE
|
re
.
MULTILINE
|
re
.
IGNORECASE
)
result
=
kb_pattern
.
search
(
code
)
if
result
is
not
None
:
name
=
(
""
.
join
(
filename
.
split
(
"."
)[:
-
1
]))
.
lower
()
if
name
.
startswith
(
"bfe_"
):
name
=
name
[
4
:]
format_elements
[
name
]
=
{
'filename'
:
filename
,
'name'
:
name
}
keys
=
format_elements
.
keys
()
keys
.
sort
()
return
map
(
format_elements
.
get
,
keys
)
###kb functions for export
def
get_kbs_info
(
kbtype
=
""
,
searchkbname
=
""
):
"""A convenience method that calls dblayer
@param kbtype: type of kb -- get only kb's of this type
@param searchkbname: get only kb's where this sting appears in the name
"""
return
bibknowledge_dblayer
.
get_kbs_info
(
kbtype
,
searchkbname
)
def
get_kba_values
(
kb_name
,
searchname
=
""
,
searchtype
=
"s"
):
"""
Returns an array of values "authority file" type = just values.
@param kb_name: name of kb
@param searchname: get these values, according to searchtype
@param searchtype: s=substring, e=exact
"""
return
bibknowledge_dblayer
.
get_kba_values
(
kb_name
,
searchname
,
searchtype
)
def
get_kbr_keys
(
kb_name
,
searchkey
=
""
,
searchvalue
=
""
,
searchtype
=
's'
):
"""
Returns an array of keys.
@param kb_name: the name of the knowledge base
@param searchkey: search using this key
@param searchvalue: search using this value
@param searchtype: s = substring, e=exact
"""
return
bibknowledge_dblayer
.
get_kbr_keys
(
kb_name
,
searchkey
,
searchvalue
,
searchtype
)
def
get_kbr_values
(
kb_name
,
searchkey
=
""
,
searchvalue
=
""
,
searchtype
=
's'
,
use_memoise
=
False
):
"""
Return a tuple of values from key-value mapping kb.
@param kb_name: the name of the knowledge base
@param searchkey: search using this key
@param searchvalue: search using this value
@param searchtype: s=substring; e=exact
@param use_memoise: can we memoise while doing lookups?
@type use_memoise: bool
"""
if
use_memoise
:
return
bibknowledge_dblayer
.
get_kbr_values_memoised
(
kb_name
,
searchkey
,
searchvalue
,
searchtype
,
use_memoise
)
else
:
return
bibknowledge_dblayer
.
get_kbr_values
(
kb_name
,
searchkey
,
searchvalue
,
searchtype
,
use_memoise
)
def
get_kbr_items
(
kb_name
,
searchkey
=
""
,
searchvalue
=
""
,
searchtype
=
's'
):
"""
Returns a list of dictionaries that match the search.
@param kb_name: the name of the knowledge base
@param searchkey: search using this key
@param searchvalue: search using this value
@param searchtype: s = substring, e=exact
@return a list of dictionaries [{'key'=>x, 'value'=>y},..]
"""
return
bibknowledge_dblayer
.
get_kbr_items
(
kb_name
,
searchkey
,
searchvalue
,
searchtype
)
def
get_kbd_values
(
kbname
,
searchwith
=
""
):
"""Return a list of values by searching a dynamic kb.
@param kbname: name of the knowledge base
@param searchwith: a term to search with
"""
import
invenio.legacy.search_engine
#first check that the kb in question is dynamic
kbid
=
bibknowledge_dblayer
.
get_kb_id
(
kbname
)
if
not
kbid
:
return
[]
kbtype
=
bibknowledge_dblayer
.
get_kb_type
(
kbid
)
if
not
kbtype
:
return
[]
if
kbtype
!=
'd'
:
return
[]
#get the configuration so that we see what the field is
confdict
=
bibknowledge_dblayer
.
get_kb_dyn_config
(
kbid
)
if
not
confdict
:
return
[]
if
'field'
not
in
confdict
:
return
[]
field
=
confdict
[
'field'
]
expression
=
confdict
[
'expression'
]
collection
=
""
if
'collection'
in
confdict
:
collection
=
confdict
[
'collection'
]
reclist
=
[]
# return this
if
searchwith
and
expression
:
if
(
expression
.
count
(
'%'
)
>
0
):
expression
=
expression
.
replace
(
"%"
,
searchwith
)
reclist
=
search_engine
.
perform_request_search
(
p
=
expression
,
cc
=
collection
)
else
:
#no %.. just make a combination
expression
=
expression
+
" and "
+
searchwith
reclist
=
search_engine
.
perform_request_search
(
p
=
expression
,
cc
=
collection
)
else
:
# either no expr or no searchwith.. but never mind about searchwith
if
expression
:
# in this case: only expression
reclist
=
search_engine
.
perform_request_search
(
p
=
expression
,
cc
=
collection
)
else
:
#make a fake expression so that only records that have this field
#will be returned
fake_exp
=
"/.*/"
if
searchwith
:
fake_exp
=
searchwith
reclist
=
search_engine
.
perform_request_search
(
f
=
field
,
p
=
fake_exp
,
cc
=
collection
)
if
reclist
:
return
[
val
for
(
val
,
dummy
)
in
\
search_engine
.
get_most_popular_field_values
(
reclist
,
field
)]
return
[]
# in case nothing worked
def
get_kbd_values_json
(
kbname
,
searchwith
=
""
):
"""Return values from searching a dynamic kb as a json-formatted string.
This IS probably the method you want.
@param kbname: name of the knowledge base
@param searchwith: a term to search with
"""
res
=
get_kbd_values
(
kbname
,
searchwith
)
return
json
.
dumps
(
res
)
def
get_kbd_values_for_bibedit
(
tag
,
collection
=
""
,
searchwith
=
""
,
expression
=
""
):
"""
Dynamically create a dynamic KB for a specific search; search; then destroy it.
This probably isn't the method you want.
Example1: tag=100__a : return values of 100__a
Example2: tag=100__a, searchwith=Jill: return values of 100__a that match with Jill
Example3: tag=100__a, searchwith=Ellis, expression="700__a:*%*: return values of
100__a for which Ellis matches some 700__a
Note: the performace of this function is ok compared to a plain
perform_request_search / get most popular fields -pair. The overhead
is about 5% with large record sets; the lookups are the xpensive part.
@param tag: the tag like 100__a
@param collection: collection id
@param searchwith: the string to search. If empty, match all.
@param expression: the search expression for perform_request_search; if
present, '%' is substituted with /searcwith/. If absent,
/searchwith/ is searched for in /tag/.
"""
dkbname
=
"tmp_dynamic_"
+
tag
+
'_'
+
expression
kb_id
=
add_kb
(
kb_name
=
dkbname
,
kb_type
=
'dynamic'
)
#get the kb name since it may be catenated by a number
#in case there are concurrent calls.
kb_name
=
get_kb_name
(
kb_id
)
bibknowledge_dblayer
.
save_kb_dyn_config
(
kb_id
,
tag
,
expression
,
collection
)
#now, get stuff
myvalues
=
get_kbd_values
(
kb_name
,
searchwith
)
#the tmp dyn kb is now useless, delete it
delete_kb
(
kb_name
)
return
myvalues
def
get_kbt_items
(
taxonomyfilename
,
templatefilename
,
searchwith
=
""
):
"""
Get items from taxonomy file using a templatefile. If searchwith is defined,
return only items that match with it.
@param taxonomyfilename: full path+name of the RDF file
@param templatefile: full path+name of the XSLT file
@param searchwith: a term to search with
"""
if
processor_type
==
1
:
# lxml
doc
=
etree
.
XML
(
taxonomyfilename
)
styledoc
=
etree
.
XML
(
templatefilename
)
style
=
etree
.
XSLT
(
styledoc
)
result
=
style
(
doc
)
strres
=
str
(
result
)
del
result
del
style
del
styledoc
del
doc
elif
processor_type
==
2
:
# libxml2 & libxslt
styledoc
=
libxml2
.
parseFile
(
templatefilename
)
style
=
libxslt
.
parseStylesheetDoc
(
styledoc
)
doc
=
libxml2
.
parseFile
(
taxonomyfilename
)
result
=
style
.
applyStylesheet
(
doc
,
None
)
strres
=
style
.
saveResultToString
(
result
)
style
.
freeStylesheet
()
doc
.
freeDoc
()
result
.
freeDoc
()
else
:
# no xml parser found
strres
=
""
ritems
=
[]
if
len
(
strres
)
==
0
:
return
[]
else
:
lines
=
strres
.
split
(
"
\n
"
)
for
line
in
lines
:
if
searchwith
:
if
line
.
count
(
searchwith
)
>
0
:
ritems
.
append
(
line
)
else
:
if
len
(
line
)
>
0
:
ritems
.
append
(
line
)
return
ritems
def
get_kbt_items_for_bibedit
(
kbtname
,
tag
=
""
,
searchwith
=
""
):
"""
A simplifield, customized version of the function get_kbt_items.
Traverses an RDF document. By default returns all leaves. If
tag defined returns the content of that tag.
If searchwith defined, returns leaves that match it.
Warning! In order to make this faster, the matching field values
cannot be multi-line!
@param kbtname: name of the taxonony kb
@param tag: name of tag whose content
@param searchwith: a term to search with
"""
from
lxml
import
etree
#get the actual file based on the kbt name
kb_id
=
get_kb_id
(
kbtname
)
if
not
kb_id
:
return
[]
#get the rdf file..
rdfname
=
cfg
[
'CFG_WEBDIR'
]
+
"/kbfiles/"
+
str
(
kb_id
)
+
".rdf"
if
not
os
.
path
.
exists
(
rdfname
):
return
[]
#parse the doc with static xslt
styledoc
=
etree
.
XML
(
"""
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<xsl:output method="xml" standalone="yes" omit-xml-declaration="yes" indent="no"/>
<xsl:template match="rdf:RDF">
<foo><!--just having some tag here speeds up output by 10x-->
<xsl:apply-templates />
</foo>
</xsl:template>
<xsl:template match="*">
<!--hi><xsl:value-of select="local-name()"/></hi-->
<xsl:if test="local-name()='"""
+
tag
+
"""'">
<myout><xsl:value-of select="normalize-space(.)"/></myout>
</xsl:if>
<!--traverse down in tree!-->
<xsl:text>
</xsl:text>
<xsl:apply-templates />
</xsl:template>
</xsl:stylesheet>
"""
)
style
=
etree
.
XSLT
(
styledoc
)
doc
=
etree
.
parse
(
open
(
rdfname
,
'r'
))
strres
=
str
(
style
(
doc
))
ritems
=
[]
if
len
(
strres
)
==
0
:
return
[]
else
:
lines
=
strres
.
split
(
"
\n
"
)
for
line
in
lines
:
#take only those with myout..
if
line
.
count
(
"<myout>"
)
>
0
:
#remove the myout tag..
line
=
line
[
9
:]
line
=
line
[:
-
8
]
if
searchwith
:
if
line
.
count
(
searchwith
)
>
0
:
ritems
.
append
(
line
)
else
:
ritems
.
append
(
line
)
return
ritems
if
__name__
==
"__main__"
:
pass
Event Timeline
Log In to Comment