Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F92124614
bfe_bibtex.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 17, 14:03
Size
15 KB
Mime Type
text/x-python
Expires
Tue, Nov 19, 14:03 (2 d)
Engine
blob
Format
Raw Data
Handle
22379377
Attached To
R3600 invenio-infoscience
bfe_bibtex.py
View Options
# -*- coding: utf-8 -*-
##
## $Id$
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__
=
"$Id$"
from
invenio.config
import
cdslang
def
format
(
bfo
,
width
=
"50"
):
"""
Prints a full BibTeX notice.
'width' must be bigger than or equal to 30.
This format element is an example of large element, which does
all the formatting by itself
@param width the width (in number of characters) of the notice
"""
out
=
"@"
width
=
int
(
width
)
if
width
<
30
:
width
=
30
name_width
=
19
value_width
=
width
-
name_width
recID
=
bfo
.
control_field
(
'001'
)
#Print entry type
import
invenio.bibformat_elements.bfe_collection
as
bfe_collection
collection
=
bfe_collection
.
format
(
bfo
=
bfo
,
kb
=
"DBCOLLID2BIBTEX"
)
if
collection
==
""
:
out
+=
"article"
else
:
out
+=
collection
out
+=
"{"
#Print BibTeX key
#
#Try to have: author_name:recID
#If author_name cannot be found, use primary_report_number
#If primary_report_number cannot be found, use additional_report_number
#If additional_report_number cannot be found, use title:recID
#If title cannot be found, use only recID
#
#The construction of this key is inherited from old BibTeX format
#written in EL, in old BibFormat.
key
=
recID
author
=
bfo
.
field
(
"100.a"
)
if
author
!=
""
:
key
=
get_name
(
author
)
+
":"
+
recID
else
:
author
=
bfo
.
field
(
"700.a"
)
if
author
!=
""
:
key
=
get_name
(
author
)
+
":"
+
recID
else
:
primary_report_number
=
bfo
.
field
(
"037.a"
)
if
primary_report_number
!=
""
:
key
=
primary_report_number
else
:
additional_report_number
=
bfo
.
field
(
"088.a"
)
if
additional_report_number
!=
""
:
key
=
primary_report_number
else
:
title
=
bfo
.
field
(
"245.a"
)
if
title
!=
""
:
key
=
get_name
(
title
)
+
":"
+
recID
out
+=
key
+
","
#Print authors
#If author cannot be found, print a field key=recID
import
invenio.bibformat_elements.bfe_authors
as
bfe_authors
authors
=
bfe_authors
.
format
(
bfo
=
bfo
,
limit
=
""
,
separator
=
" and "
,
extension
=
""
,
print_links
=
"no"
)
if
authors
==
""
:
out
+=
format_bibtex_field
(
"key"
,
recID
,
name_width
,
value_width
)
else
:
out
+=
format_bibtex_field
(
"author"
,
authors
,
name_width
,
value_width
)
#Print editors
import
invenio.bibformat_elements.bfe_editors
as
bfe_editors
editors
=
bfe_editors
.
format
(
bfo
=
bfo
,
limit
=
""
,
separator
=
" and "
,
extension
=
""
,
print_links
=
"no"
)
out
+=
format_bibtex_field
(
"editor"
,
editors
,
name_width
,
value_width
)
#Print title
import
invenio.bibformat_elements.bfe_title
as
bfe_title
title
=
bfe_title
.
format
(
bfo
=
bfo
,
separator
=
". "
)
out
+=
format_bibtex_field
(
"title"
,
title
,
name_width
,
value_width
)
#Print institution
if
collection
==
"techreport"
:
publication_name
=
bfo
.
field
(
"269.b"
)
out
+=
format_bibtex_field
(
"institution"
,
publication_name
,
name_width
,
value_width
)
#Print organization
if
collection
==
"inproceedings"
or
collection
==
"proceedings"
:
organization
=
[]
organization_1
=
bfo
.
field
(
"260.b"
)
if
organization_1
!=
""
:
organization
.
append
(
organization_1
)
organization_2
=
bfo
.
field
(
"269.b"
)
if
organization_2
!=
""
:
organization
.
append
(
organization_2
)
out
+=
format_bibtex_field
(
"organization"
,
". "
.
join
(
organization
),
name_width
,
value_width
)
#Print publisher
if
collection
==
"book"
or
collection
==
"inproceedings"
or
collection
==
"proceedings"
:
publishers
=
[]
import
invenio.bibformat_elements.bfe_publisher
as
bfe_publisher
publisher
=
bfe_publisher
.
format
(
bfo
=
bfo
)
if
publisher
!=
""
:
publishers
.
append
(
publisher
)
publication_name
=
bfo
.
field
(
"269.b"
)
if
publication_name
!=
""
:
publishers
.
append
(
publication_name
)
imprint_publisher_name
=
bfo
.
field
(
"933.b"
)
if
imprint_publisher_name
!=
""
:
publishers
.
append
(
imprint_publisher_name
)
imprint_e_journal__publisher_name
=
bfo
.
field
(
"934.b"
)
if
imprint_e_journal__publisher_name
!=
""
:
publishers
.
append
(
imprint_e_journal__publisher_name
)
out
+=
format_bibtex_field
(
"publisher"
,
". "
.
join
(
publishers
),
name_width
,
value_width
)
#Print journal
if
collection
==
"article"
:
journals
=
[]
host_title
=
bfo
.
field
(
"773.p"
)
if
host_title
!=
""
:
journals
.
append
(
host_title
)
journal
=
bfo
.
field
(
"909C4.p"
)
if
journal
!=
""
:
journals
.
append
(
journal
)
out
+=
format_bibtex_field
(
"journal"
,
". "
.
join
(
journals
),
name_width
,
value_width
)
#Print school
if
collection
==
"phdthesis"
:
university
=
bfo
.
field
(
"502.b"
)
out
+=
format_bibtex_field
(
"school"
,
university
,
name_width
,
value_width
)
#Print address
if
collection
==
"book"
or
collection
==
"inproceedings"
or
collection
==
"proceedings"
\
or
collection
==
"phdthesis"
or
collection
==
"techreport"
:
addresses
=
[]
publication_place
=
bfo
.
field
(
"260.a"
)
if
publication_place
!=
""
:
addresses
.
append
(
publication_place
)
publication_place_2
=
bfo
.
field
(
"269.a"
)
if
publication_place_2
!=
""
:
addresses
.
append
(
publication_place_2
)
imprint_publisher_place
=
bfo
.
field
(
"933.a"
)
if
imprint_publisher_place
!=
""
:
addresses
.
append
(
imprint_publisher_place
)
imprint_e_journal__publisher_place
=
bfo
.
field
(
"934.a"
)
if
imprint_e_journal__publisher_place
!=
""
:
addresses
.
append
(
imprint_e_journal__publisher_place
)
out
+=
format_bibtex_field
(
"address"
,
". "
.
join
(
addresses
),
name_width
,
value_width
)
#Print number
if
collection
==
"techreport"
or
collection
==
"article"
:
numbers
=
[]
primary_report_number
=
bfo
.
field
(
"037.a"
)
if
primary_report_number
!=
""
:
numbers
.
append
(
primary_report_number
)
additional_report_numbers
=
bfo
.
fields
(
"088.a"
)
additional_report_numbers
=
". "
.
join
(
additional_report_numbers
)
if
additional_report_numbers
!=
""
:
numbers
.
append
(
additional_report_numbers
)
host_number
=
bfo
.
field
(
"773.n"
)
if
host_number
!=
""
:
numbers
.
append
(
host_number
)
number
=
bfo
.
field
(
"909C4.n"
)
if
number
!=
""
:
numbers
.
append
(
number
)
out
+=
format_bibtex_field
(
"number"
,
". "
.
join
(
numbers
),
name_width
,
value_width
)
#Print volume
if
collection
==
"article"
or
collection
==
"book"
:
volumes
=
[]
host_volume
=
bfo
.
field
(
"773.v"
)
if
host_volume
!=
""
:
volumes
.
append
(
host_volume
)
volume
=
bfo
.
field
(
"909C4.v"
)
if
volume
!=
""
:
volumes
.
append
(
volume
)
out
+=
format_bibtex_field
(
"volume"
,
". "
.
join
(
volumes
),
name_width
,
value_width
)
#Print series
if
collection
==
"book"
:
series
=
bfo
.
field
(
"490.a"
)
out
+=
format_bibtex_field
(
"series"
,
series
,
name_width
,
value_width
)
#Print pages
if
collection
==
"article"
or
collection
==
"inproceedings"
:
pages
=
[]
host_pages
=
bfo
.
field
(
"773.c"
)
if
host_pages
!=
""
:
pages
.
append
(
host_pages
)
nb_pages
=
bfo
.
field
(
"909C4.c"
)
if
nb_pages
!=
""
:
pages
.
append
(
nb_pages
)
phys_pagination
=
bfo
.
field
(
"300.a"
)
if
phys_pagination
!=
""
:
pages
.
append
(
phys_pagination
)
out
+=
format_bibtex_field
(
"pages"
,
". "
.
join
(
pages
),
name_width
,
value_width
)
#Print month
month
=
get_month
(
bfo
.
field
(
"269.c"
))
if
month
==
""
:
month
=
get_month
(
bfo
.
field
(
"260.c"
))
if
month
==
""
:
month
=
get_month
(
bfo
.
field
(
"502.c"
))
out
+=
format_bibtex_field
(
"month"
,
month
,
name_width
,
value_width
)
#Print year
year
=
get_year
(
bfo
.
field
(
"269.c"
))
if
year
==
""
:
year
=
get_year
(
bfo
.
field
(
"260.c"
))
if
year
==
""
:
year
=
get_year
(
bfo
.
field
(
"502.c"
))
if
year
==
""
:
year
=
get_year
(
bfo
.
field
(
"909C0.y"
))
out
+=
format_bibtex_field
(
"year"
,
year
,
name_width
,
value_width
)
#Print note
note
=
bfo
.
field
(
"500.a"
)
out
+=
format_bibtex_field
(
"note"
,
note
,
name_width
,
value_width
)
out
+=
"
\n
}"
return
out
def
format_bibtex_field
(
name
,
value
,
name_width
=
20
,
value_width
=
40
):
"""
Formats a name and value to display as BibTeX field.
'name_width' is the width of the name of the field (everything before " = " on first line)
'value_width' is the width of everything after " = ".
6 empty chars are printed before the name, then the name and then it is filled with spaces to meet
the required width. Therefore name_width must be > 6 + len(name)
Then " = " is printed (notice spaces).
So the total width will be name_width + value_width + len(" = ")
(3)
if value is empty string, then return empty string.
For example format_bibtex_field('author', 'a long value for this record', 13, 15) will
return :
>>
>> name = "a long value
>> for this record",
"""
if
name_width
<
6
+
len
(
name
):
name_width
=
6
+
len
(
name
)
if
value_width
<
2
:
value_width
=
2
if
value
==
None
or
value
==
""
:
return
""
#format name
name
=
"
\n
"
+
name
name
=
name
.
ljust
(
name_width
)
#format value
value
=
'"'
+
value
+
'"'
#Add quotes to value
value_lines
=
[]
last_cut
=
0
cursor
=
value_width
-
1
#First line is smaller because of quote
increase
=
False
while
cursor
<
len
(
value
):
if
cursor
==
last_cut
:
#Case where word is bigger than the max number of chars per line
increase
=
True
cursor
=
last_cut
+
value_width
-
1
if
value
[
cursor
]
!=
" "
and
not
increase
:
cursor
-=
1
elif
value
[
cursor
]
!=
" "
and
increase
:
cursor
+=
1
else
:
value_lines
.
append
(
value
[
last_cut
:
cursor
])
last_cut
=
cursor
cursor
+=
value_width
increase
=
False
#Take rest of string
last_line
=
value
[
last_cut
:]
if
last_line
!=
""
:
value_lines
.
append
(
last_line
)
tabs
=
""
.
ljust
(
name_width
+
2
)
value
=
(
"
\n
"
+
tabs
)
.
join
(
value_lines
)
return
name
+
' = '
+
value
+
","
def
get_name
(
string
):
"""
Tries to return the last name contained in a string.
In fact returns the text before any comma in 'string', whith
spaces removed. If comma not found, get longest word in 'string'
Behaviour inherited from old GET_NAME function defined as UFD in
old BibFormat. We need to return the same value, to keep back
compatibility with already generated BibTeX notices.
Eg: get_name("سtlund, عvind B") returns "سtlund".
"""
names
=
string
.
split
(
','
)
if
len
(
names
)
==
1
:
#Comma not found.
#Split around any space
longest_name
=
""
words
=
string
.
split
()
for
word
in
words
:
if
len
(
word
)
>
len
(
longest_name
):
longest_name
=
word
return
longest_name
else
:
return
names
[
0
]
.
replace
(
" "
,
""
)
def
get_year
(
date
,
default
=
""
):
"""
Returns the year from a textual date retrieved from a record
The returned value is a 4 digits string.
If year cannot be found, returns 'default'
Returns first value found.
@param date the textual date to retrieve the year from
@param default a default value to return if year not fount
"""
import
re
year_pattern
=
re
.
compile
(
r'\d\d\d\d'
)
result
=
year_pattern
.
search
(
date
)
if
result
!=
None
:
return
result
.
group
()
return
default
def
get_month
(
date
,
ln
=
cdslang
,
default
=
""
):
"""
Returns the year from a textual date retrieved from a record
The returned value is the 3 letters short month name in language 'ln'
If year cannot be found, returns 'default'
@param date the textual date to retrieve the year from
@param default a default value to return if year not fount
"""
import
re
from
invenio.dateutils
import
get_i18n_month_name
from
invenio.messages
import
language_list_long
#Look for textual month like "Jan" or "sep" or "November" or "novem"
#Limit to cdslang as language first (most probable date)
#Look for short months. Also matches for long months
short_months
=
[
get_i18n_month_name
(
month
)
.
lower
()
for
month
in
range
(
1
,
13
)]
# ["jan","feb","mar",...]
short_months_pattern
=
re
.
compile
(
r'('
+
r'|'
.
join
(
short_months
)
+
r')'
,
re
.
IGNORECASE
)
# (jan|feb|mar|...)
result
=
short_months_pattern
.
search
(
date
)
if
result
!=
None
:
try
:
month_nb
=
short_months
.
index
(
result
.
group
()
.
lower
())
+
1
return
get_i18n_month_name
(
month_nb
,
"short"
,
ln
)
except
:
pass
#Look for month specified as number in the form 2004/03/08 or 17 02 2004
#(always take second group of 2 or 1 digits separated by spaces or - etc.)
month_pattern
=
re
.
compile
(
r'\d([\s]|[-/.,])+(?P<month>(\d){1,2})([\s]|[-/.,])'
)
result
=
month_pattern
.
search
(
date
)
if
result
!=
None
:
try
:
month_nb
=
int
(
result
.
group
(
"month"
))
return
get_i18n_month_name
(
month_nb
,
"short"
,
ln
)
except
:
pass
#Look for textual month like "Jan" or "sep" or "November" or "novem"
#Look for the month in each language
language_list_short
=
[
x
[
0
]
for
x
in
language_list_long
()]
#Retrieve ['en', 'fr', 'de', ...]
for
lang
in
language_list_short
:
#For each language
#Look for short months. Also matches for long months
short_months
=
[
get_i18n_month_name
(
month
,
"short"
,
lang
)
.
lower
()
for
month
in
range
(
1
,
13
)]
# ["jan","feb","mar",...]
short_months_pattern
=
re
.
compile
(
r'('
+
r'|'
.
join
(
short_months
)
+
r')'
,
re
.
IGNORECASE
)
# (jan|feb|mar|...)
result
=
short_months_pattern
.
search
(
date
)
if
result
!=
None
:
try
:
month_nb
=
short_months
.
index
(
result
.
group
()
.
lower
())
+
1
return
get_i18n_month_name
(
month_nb
,
"short"
,
ln
)
except
:
pass
return
default
Event Timeline
Log In to Comment