bfe_bibtex.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sun, Nov 17, 14:03

bfe_bibtex.py
View Options

	# -- coding: utf-8 --
	##
	## $Id$
	##
	## This file is part of CDS Invenio.
	## Copyright (C) 2002, 2003, 2004, 2005, 2006 CERN.
	##
	## CDS Invenio is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation; either version 2 of the
	## License, or (at your option) any later version.
	##
	## CDS Invenio is distributed in the hope that it will be useful, but
	## WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	## General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License
	## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
	## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	__revision__ = "$Id$"

	from invenio.config import cdslang

	def format(bfo, width="50"):
	"""
	Prints a full BibTeX notice.

	'width' must be bigger than or equal to 30.
	This format element is an example of large element, which does
	all the formatting by itself

	@param width the width (in number of characters) of the notice
	"""
	out = "@"
	width = int(width)
	if width < 30:
	width = 30

	name_width = 19
	value_width = width-name_width
	recID = bfo.control_field('001')

	#Print entry type
	import invenio.bibformat_elements.bfe_collection as bfe_collection
	collection = bfe_collection.format(bfo=bfo, kb="DBCOLLID2BIBTEX")
	if collection == "":
	out += "article"
	else:
	out += collection

	out += "{"

	#Print BibTeX key
	#
	#Try to have: author_name:recID
	#If author_name cannot be found, use primary_report_number
	#If primary_report_number cannot be found, use additional_report_number
	#If additional_report_number cannot be found, use title:recID
	#If title cannot be found, use only recID
	#
	#The construction of this key is inherited from old BibTeX format
	#written in EL, in old BibFormat.
	key = recID
	author = bfo.field("100.a")
	if author != "":
	key = get_name(author)+":"+recID
	else:
	author = bfo.field("700.a")
	if author != "":
	key = get_name(author)+":"+recID
	else:
	primary_report_number = bfo.field("037.a")
	if primary_report_number != "":
	key = primary_report_number
	else:
	additional_report_number = bfo.field("088.a")
	if additional_report_number != "":
	key = primary_report_number
	else:
	title = bfo.field("245.a")
	if title != "":
	key = get_name(title)+":"+recID
	out += key +","

	#Print authors
	#If author cannot be found, print a field key=recID
	import invenio.bibformat_elements.bfe_authors as bfe_authors
	authors = bfe_authors.format(bfo=bfo, limit="", separator=" and ", extension="", print_links="no")
	if authors == "":
	out += format_bibtex_field("key", recID, name_width, value_width)
	else:
	out += format_bibtex_field("author", authors, name_width, value_width)

	#Print editors
	import invenio.bibformat_elements.bfe_editors as bfe_editors
	editors = bfe_editors.format(bfo=bfo, limit="", separator=" and ", extension="", print_links="no")
	out += format_bibtex_field("editor", editors, name_width, value_width)

	#Print title
	import invenio.bibformat_elements.bfe_title as bfe_title
	title = bfe_title.format(bfo=bfo, separator = ". ")
	out += format_bibtex_field("title", title, name_width, value_width)

	#Print institution
	if collection == "techreport":
	publication_name = bfo.field("269.b")
	out += format_bibtex_field("institution", publication_name, name_width, value_width)

	#Print organization
	if collection == "inproceedings" or collection == "proceedings":
	organization = []
	organization_1 = bfo.field("260.b")
	if organization_1 != "":
	organization.append(organization_1)
	organization_2 = bfo.field("269.b")
	if organization_2 != "":
	organization.append(organization_2)
	out += format_bibtex_field("organization", ". ".join(organization), name_width, value_width)

	#Print publisher
	if collection == "book" or collection == "inproceedings" or collection == "proceedings":
	publishers = []
	import invenio.bibformat_elements.bfe_publisher as bfe_publisher
	publisher = bfe_publisher.format(bfo=bfo)
	if publisher != "":
	publishers.append(publisher)
	publication_name = bfo.field("269.b")
	if publication_name != "":
	publishers.append(publication_name)
	imprint_publisher_name = bfo.field("933.b")
	if imprint_publisher_name != "":
	publishers.append(imprint_publisher_name)
	imprint_e_journal__publisher_name = bfo.field("934.b")
	if imprint_e_journal__publisher_name != "":
	publishers.append(imprint_e_journal__publisher_name)

	out += format_bibtex_field("publisher", ". ".join(publishers), name_width, value_width)

	#Print journal
	if collection == "article":
	journals = []
	host_title = bfo.field("773.p")
	if host_title != "":
	journals.append(host_title)
	journal = bfo.field("909C4.p")
	if journal != "":
	journals.append(journal)

	out += format_bibtex_field("journal", ". ".join(journals), name_width, value_width)

	#Print school
	if collection == "phdthesis":
	university = bfo.field("502.b")

	out += format_bibtex_field("school", university, name_width, value_width)

	#Print address
	if collection == "book" or collection == "inproceedings" or collection == "proceedings" \
	or collection == "phdthesis" or collection == "techreport":
	addresses = []
	publication_place = bfo.field("260.a")
	if publication_place != "":
	addresses.append(publication_place)
	publication_place_2 = bfo.field("269.a")
	if publication_place_2 != "":
	addresses.append(publication_place_2)
	imprint_publisher_place = bfo.field("933.a")
	if imprint_publisher_place != "":
	addresses.append(imprint_publisher_place)
	imprint_e_journal__publisher_place = bfo.field("934.a")
	if imprint_e_journal__publisher_place != "":
	addresses.append(imprint_e_journal__publisher_place)

	out += format_bibtex_field("address", ". ".join(addresses), name_width, value_width)

	#Print number
	if collection == "techreport" or collection == "article":
	numbers = []
	primary_report_number = bfo.field("037.a")
	if primary_report_number != "":
	numbers.append(primary_report_number)
	additional_report_numbers = bfo.fields("088.a")
	additional_report_numbers = ". ".join(additional_report_numbers)
	if additional_report_numbers != "":
	numbers.append(additional_report_numbers)
	host_number = bfo.field("773.n")
	if host_number != "":
	numbers.append(host_number)
	number = bfo.field("909C4.n")
	if number != "":
	numbers.append(number)
	out += format_bibtex_field("number", ". ".join(numbers), name_width, value_width)

	#Print volume
	if collection == "article" or collection == "book":
	volumes = []
	host_volume = bfo.field("773.v")
	if host_volume != "":
	volumes.append(host_volume)
	volume = bfo.field("909C4.v")
	if volume != "":
	volumes.append(volume)

	out += format_bibtex_field("volume", ". ".join(volumes), name_width, value_width)

	#Print series
	if collection == "book":
	series = bfo.field("490.a")
	out += format_bibtex_field("series", series, name_width, value_width)

	#Print pages
	if collection == "article" or collection == "inproceedings":
	pages = []
	host_pages = bfo.field("773.c")
	if host_pages != "":
	pages.append(host_pages)
	nb_pages = bfo.field("909C4.c")
	if nb_pages != "":
	pages.append(nb_pages)
	phys_pagination = bfo.field("300.a")
	if phys_pagination != "":
	pages.append(phys_pagination)

	out += format_bibtex_field("pages", ". ".join(pages), name_width, value_width)

	#Print month
	month = get_month(bfo.field("269.c"))
	if month == "":
	month = get_month(bfo.field("260.c"))
	if month == "":
	month = get_month(bfo.field("502.c"))

	out += format_bibtex_field("month", month, name_width, value_width)

	#Print year
	year = get_year(bfo.field("269.c"))
	if year == "":
	year = get_year(bfo.field("260.c"))
	if year == "":
	year = get_year(bfo.field("502.c"))
	if year == "":
	year = get_year(bfo.field("909C0.y"))

	out += format_bibtex_field("year", year, name_width, value_width)

	#Print note
	note = bfo.field("500.a")
	out += format_bibtex_field("note", note, name_width, value_width)

	out +="\n}"

	return out


	def format_bibtex_field(name, value, name_width=20, value_width=40):
	"""
	Formats a name and value to display as BibTeX field.

	'name_width' is the width of the name of the field (everything before " = " on first line)
	'value_width' is the width of everything after " = ".

	6 empty chars are printed before the name, then the name and then it is filled with spaces to meet
	the required width. Therefore name_width must be > 6 + len(name)

	Then " = " is printed (notice spaces).

	So the total width will be name_width + value_width + len(" = ")
	(3)

	if value is empty string, then return empty string.

	For example format_bibtex_field('author', 'a long value for this record', 13, 15) will
	return :
	>>
	>> name = "a long value
	>> for this record",
	"""
	if name_width < 6 + len(name):
	name_width = 6 + len(name)
	if value_width < 2:
	value_width = 2
	if value == None or value =="":
	return ""

	#format name
	name = "\n "+name
	name = name.ljust(name_width)

	#format value
	value = '"'+value+'"' #Add quotes to value
	value_lines = []
	last_cut = 0
	cursor = value_width -1 #First line is smaller because of quote
	increase = False
	while cursor < len(value):
	if cursor == last_cut: #Case where word is bigger than the max number of chars per line
	increase = True
	cursor = last_cut+value_width-1

	if value[cursor] != " " and not increase:
	cursor -= 1
	elif value[cursor] != " " and increase:
	cursor += 1
	else:
	value_lines.append(value[last_cut:cursor])
	last_cut = cursor
	cursor += value_width
	increase = False
	#Take rest of string
	last_line = value[last_cut:]
	if last_line != "":
	value_lines.append(last_line)

	tabs = "".ljust(name_width + 2)
	value = ("\n"+tabs).join(value_lines)

	return name + ' = ' + value + ","

	def get_name(string):
	"""
	Tries to return the last name contained in a string.

	In fact returns the text before any comma in 'string', whith
	spaces removed. If comma not found, get longest word in 'string'

	Behaviour inherited from old GET_NAME function defined as UFD in
	old BibFormat. We need to return the same value, to keep back
	compatibility with already generated BibTeX notices.

	Eg: get_name("سtlund, عvind B") returns "سtlund".
	"""
	names = string.split(',')

	if len(names) == 1:
	#Comma not found.
	#Split around any space
	longest_name = ""
	words = string.split()
	for word in words:
	if len(word) > len(longest_name):
	longest_name = word
	return longest_name
	else:
	return names[0].replace(" ", "")


	def get_year(date, default=""):
	"""
	Returns the year from a textual date retrieved from a record

	The returned value is a 4 digits string.
	If year cannot be found, returns 'default'
	Returns first value found.

	@param date the textual date to retrieve the year from
	@param default a default value to return if year not fount
	"""
	import re
	year_pattern = re.compile(r'\d\d\d\d')
	result = year_pattern.search(date)
	if result != None:
	return result.group()

	return default

	def get_month(date, ln=cdslang, default=""):
	"""
	Returns the year from a textual date retrieved from a record

	The returned value is the 3 letters short month name in language 'ln'
	If year cannot be found, returns 'default'

	@param date the textual date to retrieve the year from
	@param default a default value to return if year not fount
	"""
	import re
	from invenio.dateutils import get_i18n_month_name
	from invenio.messages import language_list_long

	#Look for textual month like "Jan" or "sep" or "November" or "novem"
	#Limit to cdslang as language first (most probable date)
	#Look for short months. Also matches for long months
	short_months = [get_i18n_month_name(month).lower() for month in range(1, 13)] # ["jan","feb","mar",...]
	short_months_pattern = re.compile(r'('+r'\|'.join(short_months)+r')', re.IGNORECASE) # (jan\|feb\|mar\|...)
	result = short_months_pattern.search(date)
	if result != None:
	try:
	month_nb = short_months.index(result.group().lower()) + 1
	return get_i18n_month_name(month_nb, "short", ln)
	except:
	pass

	#Look for month specified as number in the form 2004/03/08 or 17 02 2004
	#(always take second group of 2 or 1 digits separated by spaces or - etc.)
	month_pattern = re.compile(r'\d([\s]\|[-/.,])+(?P<month>(\d){1,2})([\s]\|[-/.,])')
	result = month_pattern.search(date)
	if result != None:
	try:
	month_nb = int(result.group("month"))
	return get_i18n_month_name(month_nb, "short", ln)
	except:
	pass

	#Look for textual month like "Jan" or "sep" or "November" or "novem"
	#Look for the month in each language
	language_list_short = [x[0] for x in language_list_long()] #Retrieve ['en', 'fr', 'de', ...]
	for lang in language_list_short: #For each language
	#Look for short months. Also matches for long months
	short_months = [get_i18n_month_name(month, "short", lang).lower() for month in range(1, 13)] # ["jan","feb","mar",...]
	short_months_pattern = re.compile(r'('+r'\|'.join(short_months)+r')', re.IGNORECASE) # (jan\|feb\|mar\|...)
	result = short_months_pattern.search(date)
	if result != None:
	try:
	month_nb = short_months.index(result.group().lower()) + 1
	return get_i18n_month_name(month_nb, "short", ln)
	except:
	pass

	return default

bfe_bibtex.pyNo OneTemporaryActions

File Metadata

bfe_bibtex.pyView Options

Event Timeline

bfe_bibtex.py
No OneTemporary
Actions

bfe_bibtex.py
View Options