Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F92240587
BibIndexDOITokenizer.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Nov 18, 16:14
Size
1 KB
Mime Type
text/x-python
Expires
Wed, Nov 20, 16:14 (2 d)
Engine
blob
Format
Raw Data
Handle
22402129
Attached To
R3600 invenio-infoscience
BibIndexDOITokenizer.py
View Options
# -*- coding:utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2013 CERN.
#
# Invenio is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# Invenio is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Invenio; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
from
invenio.modules.indexer.tokenizers.BibIndexFilteringTokenizer
import
BibIndexFilteringTokenizer
class
BibIndexDOITokenizer
(
BibIndexFilteringTokenizer
):
"""
Filtering tokenizer which tokenizes DOI tag (0247_a)
only if "0247_2" tag is present and its value equals "DOI"
and 909C4a tag without any constraints.
"""
def
__init__
(
self
,
stemming_language
=
None
,
remove_stopwords
=
False
,
remove_html_markup
=
False
,
remove_latex_markup
=
False
):
self
.
rules
=
((
'0247_a'
,
'2'
,
'DOI'
),
(
'909C4a'
,
''
,
''
))
def
get_tokenizing_function
(
self
,
wordtable_type
):
"""Returns proper tokenizing function"""
return
self
.
tokenize
def
tokenize_via_recjson
(
self
,
recID
):
"""
Nonmarc version of tokenize function for DOI.
Note: with nonmarc we don't need to filter anymore.
We just need to take value from record because we
use bibfield here.
"""
rec
=
get_record
(
recID
)
values
=
rec
.
get
(
'doi'
,
[])
return
values
def
get_nonmarc_tokenizing_function
(
self
,
table_type
):
"""
Returns proper tokenizing function for non-marc records.
"""
return
self
.
tokenize_via_recjson
Event Timeline
Log In to Comment