Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90871558
BibIndexAuthorCountTokenizer.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Nov 5, 13:04
Size
2 KB
Mime Type
text/x-python
Expires
Thu, Nov 7, 13:04 (2 d)
Engine
blob
Format
Raw Data
Handle
22150093
Attached To
R3600 invenio-infoscience
BibIndexAuthorCountTokenizer.py
View Options
# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2010, 2011, 2012, 2014, 2015 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""BibIndexAuthorCountTokenizer: counts number of authors for any publication
given by recID. Will look at tags: '100_a' and '700_a' which are:
'first author name' and 'additional author name'.
"""
from
invenio.legacy.bibindex.engine_utils
import
get_field_count
from
invenio.modules.indexer.tokenizers.BibIndexMultiFieldTokenizer
import
BibIndexMultiFieldTokenizer
from
invenio.modules.records.api
import
get_record
class
BibIndexAuthorCountTokenizer
(
BibIndexMultiFieldTokenizer
):
"""
Returns a number of authors who created a publication
with given recID in the database.
Takes recID of the record as an argument to tokenizing function.
Calculates terms based on information from multiple tags.
For more information on this type of tokenizers take a look on
BibIndexAuthorCountTokenizer base class.
"""
def
__init__
(
self
,
stemming_language
=
None
,
remove_stopwords
=
False
,
remove_html_markup
=
False
,
remove_latex_markup
=
False
):
self
.
tags
=
[
'100__a'
,
'700__a'
]
self
.
nonmarc_tag
=
'number_of_authors'
def
tokenize
(
self
,
recID
):
"""Uses get_field_count from bibindex.engine_utils
for finding a number of authors of a publication and pass it in the list"""
return
[
str
(
get_field_count
(
recID
,
self
.
tags
)),]
def
tokenize_via_recjson
(
self
,
recID
):
"""
Will tokenize with use of bibfield.
@param recID: id of the record
"""
rec
=
get_record
(
recID
)
return
[
str
(
rec
.
get
(
self
.
nonmarc_tag
)
or
0
)]
def
get_tokenizing_function
(
self
,
wordtable_type
):
return
self
.
tokenize
def
get_nonmarc_tokenizing_function
(
self
,
table_type
):
return
self
.
tokenize_via_recjson
Event Timeline
Log In to Comment